# Data Cleaning -Test Data

Data cleaning for training set is done in a separate notebook.Test dataset will also undergo the same cleaning and preprocessing steps as that of training data.

In [1]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
import seaborn as sns

In [2]:
#Read the csv
test_df = pd.read_csv('datasets/test.csv')

In [3]:
test_df.head()

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,...,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
0,2658,902301120,190,RM,69.0,9142,Pave,Grvl,Reg,Lvl,...,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,Pave,,IR1,Lvl,...,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,Pave,,IR1,Lvl,...,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,Pave,,Reg,Lvl,...,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,Pave,,IR1,Lvl,...,0,185,0,,,,0,7,2009,WD


In [4]:
#Check for missing values of test dataset
test_df.isnull().sum().sort_values(ascending = False).head(30)

Pool QC           875
Misc Feature      838
Alley             821
Fence             707
Fireplace Qu      422
Lot Frontage      160
Garage Cond        45
Garage Qual        45
Garage Yr Blt      45
Garage Finish      45
Garage Type        44
Bsmt Exposure      25
BsmtFin Type 1     25
Bsmt Qual          25
BsmtFin Type 2     25
Bsmt Cond          25
Mas Vnr Area        1
Mas Vnr Type        1
Electrical          1
Overall Cond        0
Exter Qual          0
Exter Cond          0
Foundation          0
Exterior 2nd        0
Exterior 1st        0
Roof Matl           0
Roof Style          0
Year Remod/Add      0
Year Built          0
Sale Type           0
dtype: int64

### Meaningful Missing

In [5]:
#From above we could see that some of the categorical features whose missing values are meaningful by looking at the description of data.
# For example missing value in 'PoolQC' represents that there is no Pool in the property
#All the null values in Bsmt related varaibles are missing because there is no basement. 
#Therefore will replace all the missing values with 0 or None
# BsmtFin Type 2     55
# Bsmt Exposure      55
# Bsmt Cond          55
# Bsmt Qual          55
# BsmtFin Type 1     55
meaningful_missing = ["Alley", "Bsmt Qual", "Bsmt Cond", "Bsmt Exposure", "BsmtFin Type 1", 
                    "BsmtFin Type 2", "Fireplace Qu", "Garage Type", "Garage Finish", "Garage Qual", 
                    "Garage Cond", "Fence","Pool QC", "Misc Feature"]

In [6]:
#Fill the null values of meaningful missing columns with 'None'
for i in meaningful_missing:
    test_df[i].fillna("None", inplace=True)

In [7]:
# LotFrontage: Linear feet of street connected to property
# For LotFrontage, we will assume that if it is NaN, then there is 0 feet of property connected to the street
test_df['Lot Frontage'].fillna(0, inplace=True)

In [8]:
# Impute GarageYrBlt: wherever GarageYrBlt is null or 0, replace it with the year the house was built
test_df.loc[test_df['Garage Yr Blt'].isnull(), 'Garage Yr Blt']= test_df['Year Built']

In [9]:
#Garage Area and Garage Cars is null when Garage cond is None(meaning no garage)
#Therefore will impute all other null values with 0
test_df['Garage Cars'].fillna(0, inplace =True)
test_df['Garage Area'].fillna(0, inplace =True)

In [10]:
#Create new column as Garage age
test_df['Garage Age'] = 2021 - test_df['Garage Yr Blt']

In [11]:
test_df.drop(columns=['Garage Yr Blt'],inplace = True)

### Masonry

In [12]:
#Fill missing values for MasVnrType with "none"
test_df['Mas Vnr Type'].fillna("None", inplace=True)
#Fill missing values for MasVnrArea with 0
test_df['Mas Vnr Area'].fillna(0, inplace=True)

### Basement Baths

In [13]:
#we could see that Bsmt Full Bath and Bsmt Half Bath is null for no basement(Bsmt Cond is none, meaning no basement)
#Garage Area and Garage Cars is null when Garage cond is None(meaning no garage)
#Therefore will impute all other null values will None and 0

test_df['Bsmt Full Bath'].fillna(0, inplace =True)
test_df['Bsmt Half Bath'].fillna(0, inplace =True)
test_df['Total Bsmt SF'].fillna(0, inplace =True)
test_df['BsmtFin SF 2'].fillna(0, inplace =True)
test_df['Bsmt Unf SF'].fillna(0, inplace =True)
test_df['BsmtFin SF 1'].fillna(0, inplace =True)

### Electrical

In [14]:
# pd.set_option('display.max_rows', 100)
test_df['Electrical'].unique()

array(['FuseP', 'SBrkr', 'FuseA', 'FuseF', nan], dtype=object)

In [15]:
test_df['Electrical'] = test_df['Electrical'].fillna('None')

### Porch Area

In [16]:
#As there are many features indicating the porch area, will combine all ceate a new variable
test_df['zero'] = 0
test_df['HasPorch'] = 0

test_df['TotalPorchArea'] = test_df['Wood Deck SF'] + test_df['Open Porch SF'] + test_df['3Ssn Porch'] +test_df['Screen Porch'] + test_df['Enclosed Porch']

### Property_age

In [17]:
#Create new column as Property age
test_df['Property_age'] = 2021 - test_df['Year Built']

In [18]:
test_df.drop(columns = ['Year Built'], inplace =True)

In [19]:
#Create new column as Remod age
test_df['Remod Age'] = 2021 - test_df['Year Remod/Add']

In [20]:
test_df.drop(columns = ['Year Remod/Add'], inplace =True)

In [21]:
test_df = test_df.drop(['PID','zero'],axis = 1)

In [22]:
test_df.set_index('Id',inplace=True)

In [23]:
#Convert 'MSSubClass' column to type(str)
test_df['MS SubClass'] = test_df['MS SubClass'].astype('str')

In [24]:
numerical_features = test_df.select_dtypes(include = ['int64', 'float64']).columns.to_list()
numerical_features

['Lot Frontage',
 'Lot Area',
 'Overall Qual',
 'Overall Cond',
 'Mas Vnr Area',
 'BsmtFin SF 1',
 'BsmtFin SF 2',
 'Bsmt Unf SF',
 'Total Bsmt SF',
 '1st Flr SF',
 '2nd Flr SF',
 'Low Qual Fin SF',
 'Gr Liv Area',
 'Bsmt Full Bath',
 'Bsmt Half Bath',
 'Full Bath',
 'Half Bath',
 'Bedroom AbvGr',
 'Kitchen AbvGr',
 'TotRms AbvGrd',
 'Fireplaces',
 'Garage Cars',
 'Garage Area',
 'Wood Deck SF',
 'Open Porch SF',
 'Enclosed Porch',
 '3Ssn Porch',
 'Screen Porch',
 'Pool Area',
 'Misc Val',
 'Mo Sold',
 'Yr Sold',
 'Garage Age',
 'HasPorch',
 'TotalPorchArea',
 'Property_age',
 'Remod Age']

In [25]:
categorical_features = test_df.select_dtypes(include = 'object').columns.to_list()
categorical_features

['MS SubClass',
 'MS Zoning',
 'Street',
 'Alley',
 'Lot Shape',
 'Land Contour',
 'Utilities',
 'Lot Config',
 'Land Slope',
 'Neighborhood',
 'Condition 1',
 'Condition 2',
 'Bldg Type',
 'House Style',
 'Roof Style',
 'Roof Matl',
 'Exterior 1st',
 'Exterior 2nd',
 'Mas Vnr Type',
 'Exter Qual',
 'Exter Cond',
 'Foundation',
 'Bsmt Qual',
 'Bsmt Cond',
 'Bsmt Exposure',
 'BsmtFin Type 1',
 'BsmtFin Type 2',
 'Heating',
 'Heating QC',
 'Central Air',
 'Electrical',
 'Kitchen Qual',
 'Functional',
 'Fireplace Qu',
 'Garage Type',
 'Garage Finish',
 'Garage Qual',
 'Garage Cond',
 'Paved Drive',
 'Pool QC',
 'Fence',
 'Misc Feature',
 'Sale Type']

In [26]:
ordinal_features = [
 'Lot Shape',
 'Land Slope',
 'Exter Qual',
 'Exter Cond',
 'Bsmt Qual',
 'Bsmt Cond',
 'Bsmt Exposure',
 'BsmtFin Type 1',
 'BsmtFin Type 2',
 'Heating QC',
 'Electrical',
 'Kitchen Qual',
 'Functional',
 'Fireplace Qu',
 'Garage Finish',
 'Garage Qual',
 'Garage Cond',
 'Paved Drive',
 'Pool QC',
 'Fence','Utilities']

In [27]:
for feature in ordinal_features:
    
    print(test_df[feature].value_counts())
    print('-------------------------')

Reg    564
IR1    287
IR2     21
IR3      7
Name: Lot Shape, dtype: int64
-------------------------
Gtl    836
Mod     37
Sev      6
Name: Land Slope, dtype: int64
-------------------------
TA    552
Gd    292
Ex     26
Fa      9
Name: Exter Qual, dtype: int64
-------------------------
TA    771
Gd     84
Fa     18
Ex      5
Po      1
Name: Exter Cond, dtype: int64
-------------------------
TA      396
Gd      355
Ex       74
Fa       28
None     25
Po        1
Name: Bsmt Qual, dtype: int64
-------------------------
TA      782
Fa       39
Gd       33
None     25
Name: Bsmt Cond, dtype: int64
-------------------------
No      567
Av      130
Gd       81
Mn       76
None     25
Name: Bsmt Exposure, dtype: int64
-------------------------
Unf     248
GLQ     244
ALQ     136
Rec     105
BLQ      69
LwQ      52
None     25
Name: BsmtFin Type 1, dtype: int64
-------------------------
Unf     750
LwQ      29
Rec      26
None     25
BLQ      20
ALQ      18
GLQ      11
Name: BsmtFin Type 2, dty

In [28]:
test_df.replace(to_replace = {
    'Land Slope': {'Gtl': 2, 'Mod':1, 'Sev': 0},
    'Lot Shape': {'Reg':3, 'IR1':2, 'IR2': 1, 'IR3': 0},
    'Paved Drive': {'Y': 2, 'P':1, 'N': 0},
    'Pool QC': {'Ex': 4, 'Gd': 3, 'TA':2, 'Fa':1, 'None': 0},
    'Bsmt Cond': {'Ex': 5, 'Gd': 4, 'TA':3, 'Fa':2, 'Po': 1, 'None': 0},
    'Bsmt Exposure': {'Gd': 4, 'Av':3, 'Mn':2, 'No': 1, 'None': 0},
    'Bsmt Qual': {'Ex': 5, 'Gd': 4, 'TA':3, 'Fa':2, 'Po': 1, 'None': 0},
    'BsmtFin Type 1': {'GLQ': 6, 'ALQ': 5, 'BLQ': 4, 'Rec': 3, 'LwQ': 2, 'Unf': 1, 'None': 0},
    'BsmtFin Type 2': {'GLQ': 6, 'ALQ': 5, 'BLQ': 4, 'Rec': 3, 'LwQ': 2, 'Unf': 1, 'None': 0},
    'Electrical': {'SBrkr': 4, 'FuseA': 3, 'FuseF':2, 'FuseP':1, 'Mix': 0},
    'Exter Cond': {'Ex': 4, 'Gd': 3, 'TA':2, 'Fa':1, 'Po': 0},
    'Exter Qual': {'Ex': 4, 'Gd': 3, 'TA':2, 'Fa':1, 'Po': 0},
    'Fence': {'GdPrv': 4, 'MnPrv': 3, 'GdWo': 2, 'MnWw': 1, 'None': 0},
    'Fireplace Qu': {'Ex': 5, 'Gd': 4, 'TA':3, 'Fa':2, 'Po': 1, 'None': 0},
    'Functional': {'Typ': 7, 'Min1': 6, 'Min2': 5, 'Mod': 4, 'Maj1': 3, 'Maj2': 2, 'Sev': 1, 'Sal': 0},
    'Garage Cond': {'Ex': 5, 'Gd': 4, 'TA':3, 'Fa':2, 'Po': 1, 'None': 0},
    'Garage Finish': {'Fin':3, 'RFn':2, 'Unf': 1, 'None': 0},
    'Garage Qual': {'Ex': 5, 'Gd': 4, 'TA':3, 'Fa':2, 'Po': 1,'None': 0},
    'Heating QC': {'Ex': 4, 'Gd': 3, 'TA':2, 'Fa':1, 'Po': 0},
    'Kitchen Qual': {'Ex': 4, 'Gd': 3, 'TA':2, 'Fa':1, 'Po': 0},
    
}, inplace = True)

In [29]:
nominal_features = test_df.select_dtypes(include = 'object').columns.to_list()

nominal_features

['MS SubClass',
 'MS Zoning',
 'Street',
 'Alley',
 'Land Contour',
 'Utilities',
 'Lot Config',
 'Neighborhood',
 'Condition 1',
 'Condition 2',
 'Bldg Type',
 'House Style',
 'Roof Style',
 'Roof Matl',
 'Exterior 1st',
 'Exterior 2nd',
 'Mas Vnr Type',
 'Foundation',
 'Heating',
 'Central Air',
 'Electrical',
 'Garage Type',
 'Misc Feature',
 'Sale Type']

In [30]:
test_prototype = pd.get_dummies(test_df, columns = nominal_features)

test_prototype.shape

(879, 229)

In [31]:
test_prototype.to_csv('datasets/test_prototype.csv')

In [32]:
test_df.drop(columns = ['Low Qual Fin SF'], inplace = True)

In [33]:
# Since 'BsmtFin SF 2 have low correlation with saleprice, will drop it.
test_df.drop(columns = ['BsmtFin SF 2'], inplace = True)

In [34]:
test_df.drop(columns = ['Garage Cars'], inplace = True)

In [35]:
#Drop these variables as they can be represent with total porch area which also has got high correaltion
test_df = test_df.drop(['Wood Deck SF', 'Open Porch SF', '3Ssn Porch', 'Screen Porch', 'Enclosed Porch','HasPorch'], axis = 1)

In [36]:
#Low correaltion and dropped
test_df.drop(columns = ['Pool Area','Misc Val'], inplace = True)

In [37]:
#Low correlation and dropped
test_df.drop(columns = ['Mo Sold', 'Yr Sold'], inplace = True)

In [38]:
test_df.drop(columns = ['Bsmt Half Bath'], inplace = True)

In [39]:
#Some variables exhibit skewness(freq > 2000). Therefore will drop these variables
test_df.drop(columns = ['Street','Utilities','Condition 2','Roof Matl','Heating'], inplace = True)
test_df.shape

(879, 61)

In [40]:
nominal_features = test_df.select_dtypes(include = 'object').columns.to_list()
nominal_features

['MS SubClass',
 'MS Zoning',
 'Alley',
 'Land Contour',
 'Lot Config',
 'Neighborhood',
 'Condition 1',
 'Bldg Type',
 'House Style',
 'Roof Style',
 'Exterior 1st',
 'Exterior 2nd',
 'Mas Vnr Type',
 'Foundation',
 'Central Air',
 'Electrical',
 'Garage Type',
 'Misc Feature',
 'Sale Type']

In [41]:
test_df = pd.get_dummies(data = test_df, columns = nominal_features)
test_df.shape

(879, 197)

In [42]:
pd.set_option('display.max_columns', 300)
test_df.head()

Unnamed: 0_level_0,Lot Frontage,Lot Area,Lot Shape,Land Slope,Overall Qual,Overall Cond,Mas Vnr Area,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Finish,Garage Area,Garage Qual,Garage Cond,Paved Drive,Pool QC,Fence,Garage Age,TotalPorchArea,Property_age,Remod Age,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,MS SubClass_20,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS Zoning_C (all),MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Alley_Grvl,Alley_None,Alley_Pave,Land Contour_Bnk,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Lot Config_Corner,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Neighborhood_Blmngtn,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition 1_Artery,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Bldg Type_1Fam,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Fin,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Flat,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Exterior 1st_AsbShng,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsbShng,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkCmn,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_None,Mas Vnr Type_Stone,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,Central Air_N,Central Air_Y,Electrical_1,Electrical_2,Electrical_3,Electrical_4,Electrical_None,Garage Type_2Types,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_None,Misc Feature_Gar2,Misc Feature_None,Misc Feature_Othr,Misc Feature_Shed,Sale Type_COD,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1
2658,69.0,9142,3,2,6,8,0.0,2,1,2,3,1,1,0,1,1020,1020,3,908,1020,1928,0,2,0,4,2,1,9,7,0,0,1,440,1,1,2,0,0,111.0,172,111,71,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
2718,0.0,9662,2,2,5,4,0.0,2,2,4,3,1,1,0,1,1967,1967,2,1967,0,1967,0,2,0,6,2,2,10,7,0,0,3,580,3,3,2,0,0,44.0,170,44,44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
2414,58.0,17104,2,2,7,5,0.0,3,2,4,4,3,6,554,1,100,654,4,664,832,1496,1,2,1,3,1,3,7,7,1,4,2,426,3,3,2,0,0,15.0,124,15,15,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
1989,60.0,8520,3,2,5,6,0.0,3,2,3,3,1,1,0,1,968,968,2,968,0,968,0,1,0,2,1,2,5,7,0,0,1,480,2,3,0,0,0,86.0,184,98,15,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
625,0.0,9500,2,2,6,5,247.0,2,2,4,3,1,4,609,1,785,1394,3,1394,0,1394,1,1,1,3,1,2,6,7,2,4,2,514,3,3,2,0,0,58.0,261,58,58,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1


In [43]:
#Export the dataset
test_df.to_csv('datasets/test_clean.csv')