## Write a Function for Better Picture of Columns with Null Values

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

import sklearn
from sklearn import model_selection as ms
sklearn.set_config(print_changed_only=False)

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn import linear_model as lm

import xgboost

import copy

import itertools

import re

import import_ipynb

hp_train = pd.read_csv('..\hp_2a_ranked_edited_train.csv', index_col=0)
hp_saleprice = pd.read_csv('..\hp_1a_no_imputation_saleprice.csv')
#######################################################################################


def nan_count(df):
    '''
    Outputs a df of the columns and their respective null counts
    '''
    # using dict to pair column names and null counts
    null_count = {}
    
    for col in df.columns:
        if df[col].isnull().any():
            null_count[col] = np.sum(df[col].isnull())
    
    # creating df using from_dict
    null_count_df = pd.DataFrame.from_dict(null_count, orient='index', columns=['Null_Count'])
    return null_count_df
            
#########################################################################################


def general_fillna(df):
    '''
    Inputs mode in place of null values based on 
    column's data type
    '''
    
    col_name = list(df.columns)
    
    for colname in col_name:
        value_dict = dict(df[colname].value_counts())
        # what to do if the mode is 'None'
        if max(value_dict, key=value_dict.get)=='None':
            my_mode = list(value_dict.items())[1][0]
        else:
            my_mode = df[colname].mode()[0]
            
        if df[colname].dtype == 'O':
            df.loc[df[colname].isnull(), colname]=my_mode
        else:
            feat_mean = df[colname].mean()
            df.loc[df[colname].isnull(), colname]=np.mean(df[colname])

#########################################################################################


def my_fillna(df, null_col, cond_col=None, group=None):
    '''
    The function replaces the null in null_col to the mode
    based on the assumption that a corresponding cond_col is not 0
    
    df == dataframe
    null_col == the column where Null values will be imputed
    cond_col == the column that will be used as the condition for imputation
                (should not be the same as null_col)
    group == the column whose values will determine the grouping of the data
    '''
    # Conditions:
    '''creating dictionary'''
    value_dict = dict(df[null_col].value_counts())
    '''setting value of mode based on column type'''
    if (df[null_col].dtype == 'O'):
        '''what to do if the mode is 'None' '''
        if max(value_dict, key=value_dict.get)=='None':
            '''Get the next most common value'''
            my_mode = list(value_dict.items())[1][0]
        else:
            my_mode = df[null_col].mode()[0]
    else:
        my_mode = df[null_col].median()
    
    if group is not None:
        '''get the list of values in the feature for grouping'''
        group_list = list(df[group].unique())
        n_dict = {}
        if df[null_col].dtype == 'O':
            '''creating a dictionary {feature value: mode}'''
            for n in group_list:
                n_dict[n] = df[null_col].loc[df[df[group]==n].index].mode()[0]
        else:
            '''creating a dictionary {feature value: median}'''
            for n in group_list:
                n_dict[n] = df[null_col].loc[df[df[group]==n].index].median()
    # Outputs:
    '''if both cond_col and group are unspecified'''
    if (cond_col is None) & (group is None):
        df.loc[df[null_col].isnull(), null_col]=my_mode
        
    elif (cond_col is None) & (group is not None):
        '''replace null values in LotFrontage with the median values based on neighborhood'''
        df.loc[df[null_col].isnull(), null_col] = df[group].map(n_dict)
    
    elif (cond_col is not None) & (group is None):
        '''if null_col is not None, then fill the corresponding null_col with mode'''
        df.loc[((df[null_col].isnull()) &\
                ((df[cond_col] != 0) & (df[cond_col] != 1) & (df[cond_col].notnull()))),
               null_col] = my_mode
    else:
        '''dictionary to hold the mode for each group'''
        n_dict = {}
        if df[null_col].dtype == 'O':
            '''creating a dictionary {feature value: mode}'''
            for n in group_list:
                value_dict = dict(df[null_col].loc[df[df[group]==n].index].value_counts())
                '''what to do if the mode is 'None' and there are no other values'''
                if (max(value_dict, key=value_dict.get)=='None') &\
                (min(value_dict, key=value_dict.get)=='None'):
                    '''will use the mode for the entire column instead'''
                    value_dict = dict(df[null_col].value_counts())
                    '''if the mode for the entire column is also 'None' '''
                    if max(value_dict, key=value_dict.get)=='None':
                        '''will use the next most common value'''
                        my_mode = list(value_dict.items())[1][0]
                    else:
                        my_mode = df[null_col].mode()[0]
                    n_dict[n] = my_mode
                '''what to do if the mode is 'None' '''
                if max(value_dict, key=value_dict.get)=='None':
                    my_mode = list(value_dict.items())[1][0]
                    n_dict[n] = my_mode
                else:
                    n_dict[n] = df[null_col].loc[df[df[group]==n].index].mode()[0]
        else:
            for n in group_list:
                n_dict[n] = df[null_col].loc[df[df[group]==n].index].median()  
        '''replace null values in LotFrontage with the median values based on neighborhood'''
        df.loc[((df[null_col].isnull()) &\
                ((df[cond_col] != 1) & (df[cond_col] != 0) & (df[cond_col].notnull()))),
               null_col] = df[group].map(n_dict)

#########################################################################################


class Dataset_Editor():
    
    def __init__(self, conditions_edit, nbrhd_rank)
    
'''
the following functions are used for feature engineering and dropping columns
'''

def conditions_edit(df):
    # combine Railroad Adjacent and Railroad Within 200 for both railroads and both
    # condition1 and 2
    df.loc[df['Condition1'].str.contains('RRN', na=False), 'Condition1'] = 'RRN'
    df.loc[df['Condition1'].str.contains('RRA', na=False), 'Condition1'] = 'RRA'

    df.loc[df['Condition2'].str.contains('RRN', na=False), 'Condition2'] = 'RRN'
    df.loc[df['Condition2'].str.contains('RRA', na=False), 'Condition2'] = 'RRA'

def nbrhd_rank(df):
    # creating a dictionary of the neigborhoods and ranks based on median value
    list_neigborhoods = list(df['Neighborhood'].unique())
    n_dict_median = {}
    
    # get the median price for the neighborhoods
    for n in list_neigborhoods:
        n_dict_median[n] = hp_saleprice.loc[hp_train\
                                            [hp_train['Neighborhood']==n].index].median()[0]
    
    # sorting the dictionary
    n_dict_median_sort = dict(sorted(n_dict_median.items(), key=lambda item:item[1],
                                     reverse=True))
    n_list_median_sort = list(n_dict_median_sort.keys())
    n_dict_median_ranking = {}
    
    # assigning ranking
    for i, n in enumerate(n_list_median_sort):
        n_dict_median_ranking[n] = i + 1
        
    # creating a column replacing the neighborhood name with rank
    df['NbMedianRank'] = df['Neighborhood'].replace(n_dict_median_ranking)

def drop_nbrhd(df):
    if 'Neighborhood' in df.columns:
        df.drop('Neighborhood', axis=1, inplace = True)

def totallivsf_add(df):
    # creating a column with total square footage
    df.insert(0, 'TotalLivSF', df['GrLivArea'] + df['BsmtFinSF1'] + df['BsmtFinSF2'])

def drop_grlivarea(df):
    if 'GrLivArea' in df.columns:
        df.drop(['GrLivArea'], axis=1, inplace = True)

def bsmtfin_add(df):
    # adding the column for whether basement is finished
    df.insert(0, 'BsmtFin', df['BsmtFinSF1'] + df['BsmtFinSF2'])
    df.loc[df['BsmtFin'] > 0, 'BsmtFin'] = 1 

def drop_bsmtfinsf12(df):
    if set(['BsmtFinSF1', 'BsmtFinSF2']).issubset(df.columns):
        df.drop(['BsmtFinSF1', 'BsmtFinSF2'], axis=1, inplace = True)

def totalporchsf_add(df):
    # adding the total SF of porches
    df.insert(0, 'TotalPorchSF', df['OpenPorchSF'] + df['EnclosedPorch'] +\
              df['3SsnPorch'] + df['ScreenPorch'])

def drop_porches(df):
    if set(['OpenPorchSF', 'EnclosedPorch',
            '3SsnPorch', 'ScreenPorch']).issubset(df.columns):
        df.drop(['OpenPorchSF', 'EnclosedPorch',
                 '3SsnPorch', 'ScreenPorch'], axis=1, inplace = True)

def yrsremodtosold_add(df):
    # calculating the time between remodeling and selling the house
    df.insert(0, 'YrsRemodToSold', df['YrSold'] - df['YearRemodAdd'])

def drop_yrsoldremod(df):
    if set(['YrSold', 'YearRemodAdd']).issubset(df.columns):
        df.drop(['YrSold', 'YearRemodAdd'], axis=1, inplace = True)

def fireplace_yes(df):
    # Replacing number of fireplaces with either yes (1) or no (0)
    df.insert(0, 'Fireplace', [1 if x > 0 else 0 for x in df['Fireplaces']])

def drop_fireplaces(df):
    if 'Fireplaces' in df.columns:
        df.drop(['Fireplaces'], axis=1, inplace = True)

def pool_yes(df): 
    # Replacing pool area with either yes (1) or no (0)
    df.insert(0, 'Pool', [1 if x > 0 else 0 for x in df['PoolArea']])

def drop_poolarea(df):
    if 'PoolArea' in df.columns:
        df.drop(['PoolArea'], axis=1, inplace = True)

def totalbaths_add(df):
    # adding the number of bathrooms
    df.insert(0, 'TotalBaths', df['FullBath'] + df['HalfBath'] * 0.5)

def drop_baths(df):
    if set(['FullBath', 'HalfBath']).issubset(df.columns):
        df.drop(['FullBath', 'HalfBath'], axis=1, inplace = True)
    
def totalbsmtbaths_add(df):
    # adding the number of bathrooms
    df.insert(0, 'TotalBsmtBaths', df['BsmtFullBath'] + df['BsmtHalfBath'] * 0.5)
    
def drop_bsmtbaths(df):
    if set(['BsmtFullBath', 'BsmtHalfBath']).issubset(df.columns):
        df.drop(['BsmtFullBath', 'BsmtHalfBath'], axis=1, inplace = True)

#########################################################################################


def dum_scale(df):
    '''
    dummify and scale the input dataset
    '''
    # dummify the variables
    hp_dum = pd.get_dummies(df, drop_first=True)
    hp_dum_cols = list(hp_dum.columns)
    # scale the dataset
    for col in hp_dum_cols:
        hp_dum[col] = MinMaxScaler().fit_transform(np.array(hp_dum[col]).reshape(-1,1))
        hp_dum.columns = hp_dum_cols
    hp_dum_scale = copy.deepcopy(hp_dum)
    return hp_dum_scale

#########################################################################################


def mlr_model(x_trn, y_trn, x_tst, y_tst):
    lm = LinearRegression()
    # parameters to use
    fitintercept = [True, False]
    gparam_mlr_1 = {'fit_intercept': fitintercept}
    # setting parameters
    gs_mlr_1 = ms.GridSearchCV(lm, gparam_mlr_1, cv=n_folds, refit=True, n_jobs=-1,
                               scoring='neg_root_mean_squared_error',
                               return_train_score=True)
    gs_mlr_1.fit(x_trn, y_trn)
    mlr_model.test_rmse = rmse(gs_mlr_1, y_tst, x_tst)    
    
def lasso_model(x_trn, y_trn, x_tst, y_tst):
    lasso = Lasso(max_iter=1000000)
    # parameters to use
    alphas = [0.0001, 0.000112, 0.000124, 0.000136, 0.000148]
    fitintercept = [True, False]
    selec = ['cyclic', 'random']
    gparam_lasso_1 = {'alpha': alphas,
                      'fit_intercept':fitintercept,
                      'selection': selec}
    # setting parameters
    gs_lasso_1 = ms.GridSearchCV(lasso, gparam_lasso_1, cv=n_folds, refit=True, n_jobs=-1,
                                 scoring='neg_root_mean_squared_error',
                                 return_train_score=True)
    gs_lasso_1.fit(x_trn, y_trn)
    lasso_model.test_rmse = rmse(gs_lasso_1, y_tst, x_tst)

def ridge_model(x_trn, y_trn, x_tst, y_tst):
    ridge = Ridge(random_state=state, max_iter=10000, fit_intercept=True)
    # parameters to use
    solvers = ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs']
    fitintercept = [True, False]
    gparam_ridge_1 = {'solver': solvers,
                      'fit_intercept': fitintercept, 
                      'random_state': [state]}
    # setting parameters
    gs_ridge_1 = ms.GridSearchCV(ridge, gparam_ridge_1, cv=n_folds, refit=True, n_jobs=-1,
                                 scoring='neg_root_mean_squared_error',
                                 return_train_score=True)
    gs_ridge_1.fit(x_trn, y_trn)
    ridge_model.test_rmse = rmse(gs_ridge_1, y_tst, x_tst)
    
def enet_model(x_trn, y_trn, x_tst, y_tst):
    enet = ElasticNet(max_iter=10000000, selection='random')
    # parameters to use
    alphas = [1e-4, 2.5e-4, 5e-4]
    l1ratio = [0.5, 0.7, 1]
    precomputes = [True, False]
    warmstart = [True, False]
    fitintercept = [True, False]
    gparam_enet_1 = {'alpha': alphas,
                     'l1_ratio': l1ratio,
                     'precompute': precomputes, 
                     'warm_start': warmstart,
                     'fit_intercept': fitintercept}
    # setting parameters
    gs_enet_1 = ms.GridSearchCV(enet, gparam_enet_1, cv=n_folds, refit=True, n_jobs=-1,
                                scoring='neg_root_mean_squared_error', 
                                return_train_score=True)
    gs_enet_1.fit(x_trn, y_trn)
    enet_model.test_rmse = rmse(gs_enet_1, y_tst, x_tst)
    
def rfr_model(x_trn, y_trn, x_tst, y_tst):
    rfr = RandomForestRegressor()
    gparam_rfr = {}
    # setting parameters
    gs_rfr = ms.GridSearchCV(rfr, gparam_rfr, cv=n_folds, refit=True, n_jobs=-1,
                             scoring='neg_root_mean_squared_error', return_train_score=True)
    gs_rfr.fit(x_trn, y_trn)
    rfr_model.test_rmse = rmse(gs_rfr, y_tst, x_tst)
    
def gbm_model(x_trn, y_trn, x_tst, y_tst):
    gbm = GradientBoostingRegressor()
    gparam_gbm = {}
    gs_gbm = ms.GridSearchCV(gbm, gparam_gbm, cv=n_folds, refit=True, n_jobs=-1,
                          scoring='neg_root_mean_squared_error', return_train_score=True)
    gs_gbm.fit(x_trn, y_trn)
    gbm_model.test_rmse = rmse(gs_gbm, y_tst, x_tst)
    
def xgb_model(x_trn, y_trn, x_tst, y_tst):
    xgb_t = xgboost.XGBRegressor()
    gparam_xgb_t = {}
    gs_xgb_t = ms.GridSearchCV(xgb_t, gparam_xgb_t, cv=n_folds, refit=True, n_jobs=-1,
                            scoring='neg_root_mean_squared_error', return_train_score=True)
    gs_xgb_t.fit(x_trn, y_trn)
    xgb_model.test_rmse = rmse(gs_xgb_t, y_tst, x_tst)

#########################################################################################


def use_og_data(df):
    '''
    this is a placeholder function for the function below.
    It aims to use the name of the function for a dict key
    '''
    return df

def comb_list_generator(list_add, list_drop):
    '''
    This function generates a powerset for each list 
    and then generates all permutations of the 2 lists
    '''
    subset_list = []
    comb_list = []
    #https://stackoverflow.com/questions/464864/how-to-get-all-possible-combinations-of
    #-a-list-s-elements
    #https://stackoverflow.com/questions/61313027/python-executing-all-permutations-of
    #-list-of-functions

    # getting all the combinations of a list for both add and drop
    for r_a, r_b in zip(range(len(list_add)+1), range(len(list_add)+1)):
        com_list_a = list(itertools.combinations(list_add, r_a))
        com_list_b = list(itertools.combinations(list_drop, r_b))

        # getting all permutations of the 2 lists
        for subset in itertools.product(com_list_a, com_list_b):

            # adding the names of the functions we run thru to a list to track
            if subset == ((), ()):
                funcs_used_list = ['No changes to dataset']
            else:
                funcs_used_list = str(subset).split()[1::4]

            # if the list of functions is not in the list, run the functions
                # this is mostly to keep track of each permutation
            if funcs_used_list not in subset_list:
                # getting the functions used to add to dictionary
                funcs_used = ', '.join(str(subset).split()[1::4])

                # making the dict key sensible
                if funcs_used == '())':
                    funcs_used = 'use_og_data'
                comb_list.append([funcs_used])
            else:
                continue
                
    return comb_list

#########################################################################################


def apply_comb_list(com_list, df_nonedit, target, model_list, df_index):
    '''
    This function utilizes the comb_list_generator func to apply 
    the specified functions in the permutation of the powerlists
    to the dataset. This function will then run a list of specified
    regression models and add the scores as values to a key of
    whichever feature engineering functions were used. Regression
    models will be tracked via their position in the model_list
    that is passed into the function
    '''
    state = 0
    counter = 0
    
    # numbers will be indices later down
    funcs_scores = {1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}}
    
    for cb_list in com_list:

        df = copy.deepcopy(df_nonedit)

        # converting any numbers to numeric type
        for col in df.columns: 
            df[col] = df[col].astype('float', errors='ignore')

        # converting these 2 cols to string bc difficulties wi/ OneHotEncoding using Dask
        mo_yr = ['MoSold', 'YrSold']
        for my in mo_yr:
            if my in df.columns:
                df[[my]] = df[[my]].astype(str)
        
        # converting the single string within the list to a list of strings
        cb_split = re.sub("['']", "", str(cb_list)).strip('][').split(', ')
        # converting the list of strings to a single string
        funcs_used = ', '.join(cb_split)
        
        # calling all of the functions within the list
        for cb in cb_split:
            eval(cb)(df)
        
        # getting the train dataset for modeling after dummification
        hp_touse = dum_scale(df).loc[df_index]

        # setting up train and test sets
        xtrain, xtest, ytrain, ytest = ms.train_test_split(hp_touse, target,
                                                           test_size=0.2, 
                                                           random_state=state)
        # Must flatten to fit
        ytrain = ytrain.values.flatten()

        # run the models
        for i, model in enumerate(model_list):
            model(xtrain, ytrain, xtest, ytest)
            # each model has a designated number (the order in the list: models)
            i+=1
            # add scores to the dictionary of dictionaries
            funcs_scores[i][funcs_used] = model.test_rmse
        
        # keeping track of rounds
        counter += 1
#         if counter % 5 == 0:
        print(f'Completed round {counter}')
    return funcs_scores


#########################################################################################


def rmse(model, actual, features):
    '''
    Calculates the root mean squared error using sklearn's mean_squared_error
    '''
    return np.sqrt(mean_squared_error(actual, model.best_estimator_.predict(features)))

#########################################################################################



# conversion = [
#     # MSZoning
#     {'A': 1, 'C': 2, 'FV': 3, 'I': 4, 'RH': 5, 'RL': 6, 'RP': 7, 'RM': 8}, 
    
#     # Street
#     {'Grvl': 1, 'Pave': 2},
    
#     # Alley
#     {'Grvl': 1, 'Pave': 2, 'DNE': 0},
    
#     # LotShape
#     {'Reg': 1, 'IR1': 2, 'IR2': 3, 'IR3': 4}, 
    
#     # LandContour
#     {'Lvl': 1, 'Bnk': 2, 'HLS': 3, 'Low': 4},
    
#     # Utilities
#     {'AllPub': 1, 'NoSewr': 2, 'NoSeWa': 3, 'ELO': 4},
    
#     # LotConfig
#     {'Inside': 1, 'Corner': 2, 'CulDSac': 3, 'FR2': 4, 'FR3': 5}, 
    
#     # LandSlope
#     {'Gtl': 1, 'Mod': 2, 'Sev': 3},
    
#     # Neighborhood
#     {'Blmngtn': 1, 'Blueste': 2, 'BrDale': 3, 'BrkSide': 4, 'ClearCr': 5, 'CollgCr': 6,
#      'Crawfor': 7, 'Edwards': 8, 'Gilbert': 9, 'IDOTRR': 10, 'MeadowV': 11, 'Mitchel': 12,
#      'NAmes': 13, 'NoRidge': 14, 'NPkVill': 15, 'NridgHt': 16, 'NWAmes': 17, 'OldTown': 18,
#      'SWISU': 19, 'Sawyer': 20, 'SawyerW': 21, 'Somerst': 22, 'StoneBr': 23, 'Timber': 24,
#      'Veenker': 25},
    
#     # Condition1
#     {'Artery': 1, 'Feedr': 2, 'Norm': 3, 'RRNn': 4, 'RRAn': 5,
#      'PosN': 6, 'PosA': 7, 'RRNe': 8, 'RRAe': 9},
    
#     # Condition2
#     {'Artery': 1, 'Feedr': 2, 'Norm': 3, 'RRNn': 4, 'RRAn': 5,
#      'PosN': 6, 'PosA': 7, 'RRNe': 8, 'RRAe': 9},
    
#     # BldgType
#     {'1Fam': 1, '2FmCon': 2, 'Duplx': 3, 'TwnhsE': 4, 'TwnhsI': 5},
    
#     # HouseStyle
#     {'1Story': 1, '1.5Fin': 2, '1.5Unf': 3, '2Story': 4, '2.5Fin': 5, '2.5Unf': 6, 'SFoyer': 7,
#      'SLvl': 8},
    
#     # RoofStyle 
#     {'Flat': 1, 'Gable': 2, 'Gambrel': 3, 'Hip': 4, 'Mansard': 5, 'Shed' :6},
    
#     # RoofMatl
#     {'ClyTile': 1, 'CompShg': 2, 'Membran': 3, 'Metal': 4, 'Roll': 5, 'Tar&Grv': 6, 'WdShake': 7,
#      'WdShngl': 8},
    
#     # Exterior1st
#     {'AsbShng': 1, 'AsphShn': 2, 'BrkComm': 3, 'BrkFace': 4, 'CBlock': 5, 'CemntBd': 6, 'HdBoard': 7,
#      'ImStucc': 8, 'MetalSd': 9, 'Other': 10, 'Plywood': 11, 'PreCast': 12, 'Stone': 13, 'Stucco': 14,
#      'VinylSd': 15, 'Wd Sdng': 16, 'WdShing': 17},
    
#     # Exterior2nd
#     {'AsbShng': 1, 'AsphShn': 2, 'BrkComm': 3, 'BrkFace': 4, 'CBlock': 5, 'CemntBd': 6, 'HdBoard': 7,
#      'ImStucc': 8, 'MetalSd': 9, 'Other': 10, 'Plywood': 11, 'PreCast': 12, 'Stone': 13, 'Stucco': 14,
#      'VinylSd': 15, 'Wd Sdng': 16, 'WdShing': 17},
    
#     # MasVnrType
#     {'BrkCmn': 1, 'BrkFace': 2, 'CBlock': 3, 'None': 4, 'Stone': 5, 'DNE': 0},
    
#     # ExterQual
#     {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5},
    
#     # ExterCond
#     {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5},
    
#     # Foundation
#     {'BrkTil': 1, 'CBlock': 2, 'PConc': 3, 'Slab': 4, 'Stone': 5, 'Wood': 6},
    
#     # BsmtQual
#     {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5, 'DNE': 0},
    
#     # BsmtCond
#     {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5, 'DNE': 0},
    
#     # BsmtExposure
#     {'Gd': 1, 'Av': 2, 'Mn': 3, 'No': 4,'DNE': 0},
    
#     # BsmtFinType1
#     {'GLQ': 1, 'ALQ': 2, 'BLQ': 3, 'Rec': 4, 'LwQ': 5, 'Unf': 6, 'DNE': 0},
    
#     # BsmtFinType2
#     {'GLQ': 1, 'ALQ': 2, 'BLQ': 3, 'Rec': 4, 'LwQ': 5, 'Unf': 6, 'DNE': 0},
    
#     # Heating
#     {'Floor': 1, 'GasA': 2, 'GasW': 3, 'Grav': 4, 'OthW': 5, 'Wall': 6},
    
#     # HeatingQC
#     {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5},
    
#     # CentralAir
#     {'N': 0, 'Y': 1},
    
#     # Electrical
#     {'SBrkr': 1, 'FuseA': 2, 'FuseF': 3, 'FuseP': 4, 'Mix': 5, 'DNE': 0},
    
#     # KitchenQual
#     {'Ex': 1,'Gd': 2,'TA': 3,'Fa': 4,'Po': 5},
    
#     # Functional 
#     {'Typ': 1, 'Min1': 2, 'Min2': 3, 'Mod': 4, 'Maj1': 5, 'Maj2': 6, 'Sev': 7, 'Sal': 8}, 
    
#     # FireplaceQU
#     {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'Po': 5, 'DNE': 0},
    
#     # GarageType
#     {'2Types': 1, 'Attchd': 2, 'Basment': 3, 'BuiltIn': 4, 'CarPort': 5, 'Detchd': 6, 'DNE': 0}, 
    
#     # GarageFinish
#     {'Fin': 1, 'RFn': 2, 'Unf': 3, 'DNE': 4},
    
#     # GarageQual
#     {'Ex': 1,'Gd': 2,'TA': 3,'Fa': 4,'Po': 5, 'DNE':0},
    
#     # GarageCond
#     {'Ex': 1,'Gd': 2,'TA': 3,'Fa': 4,'Po': 5, 'DNE':0},
    
#     # PavedDrive
#     {'Y': 1, 'P': 2, 'N': 0}, 
    
#     # PoolQC
#     {'Ex': 1, 'Gd': 2, 'TA': 3, 'Fa': 4, 'DNE': 0},
    
#     # Fence
#     {'GdPrv': 1, 'MnPrv': 2, 'GdWo': 3, 'MnWw': 4, 'DNE': 0},

#     # MiscFeature
#     {'Elev': 1, 'Gar2': 2, 'Othr': 3, 'Shed': 4, 'TenC': 5, 'DNE': 0},
    
#     # SaleType
#     {'WD': 1, 'CWD': 2, 'VWD': 3, 'New': 4, 'COD': 5, 'Con': 6, 'ConLw': 7,
#      'ConLI': 8, 'ConLD': 9, 'Oth': 10},
    
#     # SaleCondition
#     {'Normal': 1, 'Abnorml': 2, 'AdjLand': 3, 'Alloca': 4, 'Family': 5, 'Partial': 6}]