# Part 5. Kaggle Competition Submission Code

## Importing libraries and setting global variables

In [1]:
# Import usual library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Modeling library
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score

# Load model library
import pickle

## Importing datasets

In [2]:
X = pd.read_csv('../datasets/test_cleaned.csv')

In [3]:
# View head
X.head(5)

Unnamed: 0,ms_zoning,lot_frontage,street,lot_shape,lot_contour,utilities,lot_config,lot_slope,neighborhood,condition_1,...,sale_type,sale_condition,has_2nd_floor_sf,total_sf,has_basement_sf,total_bath,has_open_porch,has_fireplace,has_wood_deck,log_lot_area
0,Res,60.0,1,1,1,AllPub,Inside,1,CollgCr,N,...,W,N,0,1980.0,1,2.0,0,0,0,8.995909
1,Res,60.0,1,0,0,AllPub,Corner,1,BrkSide,N,...,W,N,1,3176.0,1,1.5,0,1,0,8.960981
2,Res,40.0,1,0,1,AllPub,Inside,1,NridgHt,N,...,N,P,0,2736.0,1,2.0,1,1,1,8.823501
3,Res,44.0,1,0,1,AllPub,Inside,1,NridgHt,N,...,N,P,0,2716.0,1,3.0,1,1,1,8.759512
4,Res,70.0,1,0,1,AllPub,Inside,1,SawyerW,N,...,W,N,1,2778.0,1,2.5,1,0,1,9.024493


## Loading the model

In [4]:
filename = '../model/all_num_10_cat.sav'
model = pickle.load(open(filename, 'rb'))

### Predicting y

In [5]:
# Select all quality related categorical columns
cat_cols = ['external_quality','basement_quality','heating_quality','kitchen_quality','fireplace_quality',
           'garage_quality','garage_fin','building_type','house_style','sale_type','sale_condition',
            'condition_1','masonry_type','roof_style']

In [6]:
# Selecting all numerical columns
num_cols = ['overall_quality','total_sf','garage_cars','total_bath','year_built',
            'has_fireplace','total_rooms_above_ground','has_open_porch','masonry_area','log_lot_area',
            'lot_frontage','has_wood_deck','central_air','has_basement_sf','bedroom_above_ground','functional',
            'street','has_2nd_floor_sf','month_sold','year_sold','lot_contour','lot_slope','overall_condition',
            'kitchen_above_ground','lot_shape']

In [7]:
# Combine columns
cols = num_cols + cat_cols

In [8]:
# Selecting X and y for model training
X = X[cols]

In [9]:
# Get dummies
X = pd.get_dummies(columns=list(X.select_dtypes(include='object').columns), data = X, drop_first=True)

In [10]:
# predict
y_pred = model.predict(X)

In [11]:
# transform back
y_pred_exp = np.exp(y_pred)

In [12]:
# get Order column
test = pd.read_csv('../datasets/test.csv')

In [13]:
# generate dataframe
pred_df = pd.DataFrame({'Order':test['Order'], 'SalePrice':y_pred_exp})

In [14]:
# write to submission.csv
pred_df.to_csv('../datasets/submission.csv', index=False)

In [15]:
# display X columns for kaggle report
X.columns

Index(['overall_quality', 'total_sf', 'garage_cars', 'total_bath',
       'year_built', 'has_fireplace', 'total_rooms_above_ground',
       'has_open_porch', 'masonry_area', 'log_lot_area', 'lot_frontage',
       'has_wood_deck', 'central_air', 'has_basement_sf',
       'bedroom_above_ground', 'functional', 'street', 'has_2nd_floor_sf',
       'month_sold', 'year_sold', 'lot_contour', 'lot_slope',
       'overall_condition', 'kitchen_above_ground', 'lot_shape',
       'external_quality_Fa', 'external_quality_Gd', 'external_quality_TA',
       'basement_quality_E', 'basement_quality_G', 'basement_quality_O',
       'heating_quality_Fa', 'heating_quality_Gd', 'heating_quality_TA',
       'kitchen_quality_Gd', 'kitchen_quality_Ta', 'fireplace_quality_Fa',
       'fireplace_quality_Gd', 'fireplace_quality_None',
       'fireplace_quality_Po', 'fireplace_quality_TA', 'garage_quality_G',
       'garage_quality_O', 'garage_fin_None', 'garage_fin_RFn',
       'garage_fin_Unf', 'building_type_2

----