## Write a Function for Better Picture of Columns with Null Values

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

#######################################################################################


def nan_count(dataframe):
    '''
    Outputs a dataframe of the columns and their respective null counts
    '''
    # using dict to pair column names and null counts
    null_count = {}
    
    for col in dataframe.columns:
        if dataframe[col].isnull().any():
            null_count[col] = np.sum(dataframe[col].isna())
    
    # creating df using from_dict
    null_count_df = pd.DataFrame.from_dict(null_count, orient='index', columns=['Null_Count'])
    return null_count_df

#######################################################################################


def nan_col_obj_list_fillna(dataframe):
    '''
    Inputs mode in place of null values based on 
    column's data type
    '''
    
    col_name = list(dataframe.columns)
    # removing LotFrontage to use KNNImputer later on
        # not removing FireplaceQu bc it is ordinal
    col_name.remove('LotFrontage')
    
    for colname in col_name:
        value_dict = dict(dataframe[colname].value_counts())
        # what to do if the mode is 'None'
        if max(value_dict, key=value_dict.get)=='None':
            my_mode = list(value_dict.items())[1][0]
        else:
            my_mode = dataframe[colname].mode()[0]
            
        if dataframe[colname].dtype == 'object':
            dataframe.loc[dataframe[colname].isnull(), colname]=my_mode
        else:
            feat_mean = dataframe[colname].mean()
            dataframe.loc[dataframe[colname].isnull(), colname]=np.mean(dataframe[colname])
            
#########################################################################################


def mvarea_1_to_0(dataframe):
    '''
    The function replaces the 1 in the 'MasVnrArea' to 0
    of the 'MasVnrArea' based on the corresponding 'MasVnrType'
    '''

    # when 'MasVnrArea'==1, will most likely mean 0 if the MasVnrType is None or NaN
    dataframe.loc[((dataframe['MasVnrType'].isnull()) & (dataframe['MasVnrArea']==1)), 'MasVnrArea']=0
    dataframe.loc[((dataframe['MasVnrType']=='None') & (dataframe['MasVnrArea']==1)), 'MasVnrArea']=0

#########################################################################################


def mvarea_0_to_mean(dataframe):
    '''
    The function replaces the 0 in the 'MasVnrArea' to the mean
    of the 'MasVnrArea' based on the corresponding 'MasVnrType'
    '''
    vnr_types = list(dataframe.MasVnrType.unique())
    vnr_types.remove('None')
    # if MasVnrType is not None but MasVnrArea==0, then use mean value to impute
    for vt in vnr_types:
        dataframe.loc[(dataframe['MasVnrArea']==0) & (dataframe['MasVnrType']==vt),
                  'MasVnrArea'] = dataframe[(dataframe['MasVnrType']==vt)][['MasVnrArea']].mean()[0]

#########################################################################################
    
    
def mvtype_none_to_mode(dataframe):
    '''
    The function replaces the null in the 'MasVnrType' to the mode
    if 'MasVnrArea' is not 0
    '''
    # getting the 2nd highest value in case the mode is 'None'
    value_dict = dict(dataframe['MasVnrType'].value_counts())
    # what to do if the mode is 'None'
    if max(value_dict, key=value_dict.get)=='None':
        my_mode = list(value_dict.items())[1][0]
    else:
        my_mode = dataframe['MasVnrType'].mode()[0]
    # if MasVnrType is not None, then fill the corresponding MasVnrType with mode
    dataframe.loc[((dataframe['MasVnrType']=='None') & (dataframe['MasVnrArea']>1)),
                  'MasVnrType']=my_mode
    dataframe.loc[((dataframe['MasVnrType'].isnull()) & (dataframe['MasVnrArea']>1)),
                  'MasVnrType']=my_mode
          
#########################################################################################


def rmse(model, actual, features):
    '''
    Calculates the root mean squared error using sklearn's mean_squared_error
    '''
    return np.sqrt(mean_squared_error(actual, model.best_estimator_.predict(features)))

#########################################################################################


conversion = [
    # MSZoning
    {'A': 1, 'C': 2, 'FV': 3, 'I': 4, 'RH': 5, 'RL': 6, 'RP': 7, 'RM': 8}, 
    
    # Street
    {'Grvl': 1, 'Pave': 2},
    
    # Alley
    {'Grvl': 1, 'Pave': 2, 'DNE': 0},
    
    # LotShape
    {'Reg': 1, 'IR1': 2, 'IR2': 3, 'IR3': 4}, 
    
    # LandContour
    {'Lvl': 1, 'Bnk': 2, 'HLS': 3, 'Low': 4},
    
    # Utilities
    {'AllPub': 1, 'NoSewr': 2, 'NoSeWa': 3, 'ELO': 4},
    
    # LotConfig
    {'Inside': 1, 'Corner': 2, 'CulDSac': 3, 'FR2': 4, 'FR3': 5}, 
    
    # LandSlope
    {'Gtl': 1, 'Mod': 2, 'Sev': 3},
    
    # Neighborhood
    {'Blmngtn': 1, 'Blueste': 2, 'BrDale': 3, 'BrkSide': 4, 'ClearCr': 5, 'CollgCr': 6, 'Crawfor': 7,
     'Edwards': 8, 'Gilbert': 9, 'IDOTRR': 10, 'MeadowV': 11, 'Mitchel': 12, 'NAmes': 13, 'NoRidge': 14,
     'NPkVill': 15, 'NridgHt': 16, 'NWAmes': 17, 'OldTown': 18, 'SWISU': 19, 'Sawyer': 20, 'SawyerW': 21,
     'Somerst': 22, 'StoneBr': 23, 'Timber': 24, 'Veenker': 25},
    
    # Condition1
    {'Artery': 1, 'Feedr': 2, 'Norm': 3, 'RRNn': 4, 'RRAn': 5,
     'PosN': 6, 'PosA': 7, 'RRNe': 8, 'RRAe': 9},
    
    # Condition2
    {'Artery': 1, 'Feedr': 2, 'Norm': 3, 'RRNn': 4, 'RRAn': 5,
     'PosN': 6, 'PosA': 7, 'RRNe': 8, 'RRAe': 9},
    
    # BldgType
    {'1Fam': 1, '2FmCon': 2, 'Duplx': 3, 'TwnhsE': 4, 'TwnhsI': 5},
    
    # HouseStyle
    {'1Story': 1, '1.5Fin': 2, '1.5Unf': 3, '2Story': 4, '2.5Fin': 5, '2.5Unf': 6, 'SFoyer': 7, 'SLvl': 8},
    
    # RoofStyle 
    {'Flat': 1, 'Gable': 2, 'Gambrel': 3, 'Hip': 4, 'Mansard': 5, 'Shed' :6},
    
    # RoofMatl
    {'ClyTile': 1, 'CompShg': 2, 'Membran': 3, 'Metal': 4, 'Roll': 5, 'Tar&Grv': 6, 'WdShake': 7, 'WdShngl': 8},
    
    # Exterior1st
    {'AsbShng': 1, 'AsphShn': 2, 'BrkComm': 3, 'BrkFace': 4, 'CBlock': 5, 'CemntBd': 6, 'HdBoard': 7,
     'ImStucc': 8, 'MetalSd': 9, 'Other': 10, 'Plywood': 11, 'PreCast': 12, 'Stone': 13, 'Stucco': 14,
     'VinylSd': 15, 'Wd Sdng': 16, 'WdShing': 17},
    
    # Exterior2nd
    {'AsbShng': 1, 'AsphShn': 2, 'BrkComm': 3, 'BrkFace': 4, 'CBlock': 5, 'CemntBd': 6, 'HdBoard': 7,
     'ImStucc': 8, 'MetalSd': 9, 'Other': 10, 'Plywood': 11, 'PreCast': 12, 'Stone': 13, 'Stucco': 14,
     'VinylSd': 15, 'Wd Sdng': 16, 'WdShing': 17},
    
    # MasVnrType
    {'BrkCmn': 1, 'BrkFace': 2, 'CBlock': 3, 'None': 4, 'Stone': 5, 'DNE': 0},
    
    # ExterQual
    {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5},
    
    # ExterCond
    {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5},
    
    # Foundation
    {'BrkTil': 1, 'CBlock': 2, 'PConc': 3, 'Slab': 4, 'Stone': 5, 'Wood': 6},
    
    # BsmtQual
    {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5, 'DNE': 0},
    
    # BsmtCond
    {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5, 'DNE': 0},
    
    # BsmtExposure
    {'Gd': 1, 'Av': 2, 'Mn': 3, 'No': 4,'DNE': 0},
    
    # BsmtFinType1
    {'GLQ': 1, 'ALQ': 2, 'BLQ': 3, 'Rec': 4, 'LwQ': 5, 'Unf': 6, 'DNE': 0},
    
    # BsmtFinType2
    {'GLQ': 1, 'ALQ': 2, 'BLQ': 3, 'Rec': 4, 'LwQ': 5, 'Unf': 6, 'DNE': 0},
    
    # Heating
    {'Floor': 1, 'GasA': 2, 'GasW': 3, 'Grav': 4, 'OthW': 5, 'Wall': 6},
    
    # HeatingQC
    {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5},
    
    # CentralAir
    {'N': 0, 'Y': 1},
    
    # Electrical
    {'SBrkr': 1, 'FuseA': 2, 'FuseF': 3, 'FuseP': 4, 'Mix': 5, 'DNE': 0},
    
    # KitchenQual
    {'Ex': 1,'Gd': 2,'TA': 3,'Fa': 4,'Po': 5},
    
    # Functional 
    {'Typ': 1, 'Min1': 2, 'Min2': 3, 'Mod': 4, 'Maj1': 5, 'Maj2': 6, 'Sev': 7, 'Sal': 8}, 
    
    # FireplaceQU
    {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5, 'DNE': 0},
    
    # GarageType
    {'2Types': 1, 'Attchd': 2, 'Basment': 3, 'BuiltIn': 4, 'CarPort': 5, 'Detchd': 6, 'DNE': 0}, 
    
    # GarageFinish
    {'Fin': 1, 'RFn': 2, 'Unf': 3, 'DNE': 4},
    
    # GarageQual
    {'Ex': 1,'Gd': 2,'TA': 3,'Fa': 4,'Po': 5, 'DNE':0},
    
    # GarageCond
    {'Ex': 1,'Gd': 2,'TA': 3,'Fa': 4,'Po': 5, 'DNE':0},
    
    # PavedDrive
    {'Y': 1, 'P': 2, 'N': 0}, 
    
    # PoolQC
    {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'DNE': 0},
    
    # Fence
    {'GdPrv': 1, 'MnPrv': 2, 'GdWo': 3, 'MnWw': 4, 'DNE': 0},

    # MiscFeature
    {'Elev': 1, 'Gar2': 2, 'Othr': 3, 'Shed': 4, 'TenC': 5, 'DNE': 0},
    
    # SaleType
    {'WD': 1, 'CWD': 2, 'VWD': 3, 'New': 4, 'COD': 5, 'Con': 6, 'ConLw': 7,
     'ConLI': 8, 'ConLD': 9, 'Oth': 10},
    
    # SaleCondition
    {'Normal': 1, 'Abnorml': 2, 'AdjLand': 3, 'Alloca': 4, 'Family': 5, 'Partial': 6}]