## Write a Function for Better Picture of Columns with Null Values

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error


def col_obj_list(dataframe, index_of_cols_in, list_of_cols_out_obj, list_of_cols_out_num):
    '''
    Creates a list of column names with dtype=='object'
    Creates a list of column names with dtype!='object'
    '''
    for colname in index_of_cols_in.values.tolist():
        if dataframe[colname].dtype == 'object':
            list_of_cols_out_obj.append(colname)
        else:
            list_of_cols_out_num.append(colname)

#######################################################################################



def nan_col_list(dataframe, list_of_cols_in, list_of_cols_out):
    '''
    Outputs a list of the columns that have null values
    '''
    for colname in list_of_cols_in:
        if np.sum(dataframe[colname].isnull()) > 0:
            list_of_cols_out.append(colname)
    return list_of_cols_out

#######################################################################################



def cols_nan_unique(dataframe, list_of_cols_in, data_dict, data_null):
    '''
    Creates 2 dictionaries
        1st: contains unique values of the column
        2nd: contains the null count
    '''
    for colname in list_of_cols_in:
        unique_vals = list(dataframe[dataframe[colname].notnull()][colname].unique())
        count_null = np.sum(dataframe[colname].isna())
        data_dict.update({colname: unique_vals})
        data_null.update({colname: count_null})
        
#######################################################################################



def nan_col_obj_list_fillna(dataframe, list_of_cols_in):
    '''
    Inputs either 'DNE' or 0 for null values based on 
    column's data type
    '''
    for colname in list_of_cols_in:
        if dataframe[colname].dtype == 'object':
            dataframe[[colname]] = dataframe[[colname]].fillna('DNE')
        else:
            dataframe[colname] = dataframe[[colname]].fillna(0)
            
#########################################################################################


def fill_masvnrtype(df, a_list_mvt):
    '''
    The function replaces the 0 in the 'MasVnrArea' to the mode
    of the 'MasVnrArea' based on the corresponding 'MasVnrType'
    '''
    
    # iterates through a list of unique values of 'MasVnrType' that will be
    # specified by the user and then offers the mode of the 'MasVnrArea'
    for mvt_type in a_list_mvt:
        mvt_mode = df.loc[df['MasVnrType']==mvt_type, 'MasVnrArea'].mode()

        i_collector = []
        x_collector = []
        avg_mode = 0

        for i, x in enumerate(mvt_mode):
            i_collector.append(i)
            x_collector.append(x)
        
        # if there are multiple modes, the function will average them
        if i_collector[-1] > 0:
            avg_mode = sum(x_collector)/len(x_collector)
        else:
            avg_mode = mvt_mode[0]
        
        # this will set the 'MasVnrArea' that is 0 and where there is a type
        # to the value of the mode or the average of modes
        df.loc[(df['MasVnrType']==mvt_type) & (df['MasVnrArea']==0), 'MasVnrArea'] = avg_mode
    
    # as an initial check
    print(i_collector)
    print(avg_mode)
            
#########################################################################################


def rmse(model, actual, features):
    '''
    Calculates the root mean squared error using sklearn's mean_squared_error
    '''
    return np.sqrt(mean_squared_error(actual, model.best_estimator_.predict(features)))