In [5]:
"""profile_numericalanalysis.py"""

"""This script contains the methods to perform the analysis on the numerical values of the data frame."""
import numpy as np
import pandas as pd
import pandas_profiling as pp
import operator
from IPython.display import display,HTML

def numerical_analysis(df_column):
    """
    Returns whether the dataframe is of type numeric, is a hybrid of numerical and non-numerical values,
    profile report of the numerical data.
    Args:
        dataframe (pandas dataframe): Input pandas dataframe
    Returns:
        dict (dict)
    """
    dict={}
    df_column,isNumeric,isHybrid = find_type(df_column)
    dict['isNumeric'] = isNumeric
    dict['isHybrid'] = isHybrid
    if isNumeric:
        dict['analysisReport'] = pp.ProfileReport(df_column)
    return dict

def find_type(df_column):
    """
    Returns whether the dataframe is of type numeric, is a hybrid of numerical and non-numerical values.
    Args:
        dataframe (pandas dataframe): Input pandas dataframe
    Returns:
        (dataframe,bool,bool): Tuple of pandas dataframe,bool,bool
    """
    #Initialize all needed values
    col_title = df_column.columns.get_values()[0]
    isNumeric = False
    isHybrid = False
    #Find if the dtype is numerical or not
    if df_column[col_title].dtype == np.number: 
        isNumeric = True
        list_numericValues = df_column[col_title].values
    elif checkIfNumericTypeCast(df_column)[1]:
        isNumeric = True
        df_column = checkIfNumericTypeCast(df_column)[0]
    if not isNumeric:
        isHybrid = checkIfHybridType(df_column)
    return (df_column,isNumeric,isHybrid)
        
def checkIfNumericTypeCast(df_column):
    """
    Checks whether the dataframe values can be typecast to type numeric and returns the dataframe.
    Args:
        dataframe (pandas dataframe): Input pandas dataframe
    Returns:
        (df_column,isNumeric): Tuple of pandas dataframe,bool
    """
    col_title = df_column.columns.get_values()[0]
    isNumeric = True
    try:
        df_column[col_title]  = pd.to_numeric(df_column[col_title],errors='raise')
    except:
        isNumeric = False
    return (df_column,isNumeric)

def checkIfHybridType(df_column):
    """
    Returns whether the dataframe consists of numeric and non-numeric values.
    Args:
        dataframe (pandas dataframe): Input pandas dataframe
    Returns:
        bool
    """
    col_title = df_column.columns.get_values()[0]
    col_series = []
    col_series  = pd.to_numeric(df_column[col_title],errors='coerce')
    if len(col_series.dropna()):
        return True
    else:
        return False