In [117]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.impute import KNNImputer
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
import matplotlib.pyplot as plt

In [2]:
#Importing training data
df_train = pd.read_csv('train.csv', index_col = 'Id')

In [3]:
#Importing test data
df_test = pd.read_csv('test.csv', index_col = 'Id')

In [4]:
#Create train and target data
y_train = pd.DataFrame(df_train['SalePrice'])
df_train = df_train.drop('SalePrice', axis = 1)

In [5]:
#Join train and test data to match column count after encoding
joined = pd.concat([df_train, df_test])

In [6]:
#Check for null values
joined[joined.columns[joined.isnull().sum() > 0]].isna().mean()*100

MSZoning         0.137033
LotFrontage     16.649538
Alley           93.216855
Utilities        0.068517
Exterior1st      0.034258
Exterior2nd      0.034258
MasVnrType       0.822199
MasVnrArea       0.787941
BsmtQual         2.774923
BsmtCond         2.809181
BsmtExposure     2.809181
BsmtFinType1     2.706406
BsmtFinSF1       0.034258
BsmtFinType2     2.740665
BsmtFinSF2       0.034258
BsmtUnfSF        0.034258
TotalBsmtSF      0.034258
Electrical       0.034258
BsmtFullBath     0.068517
BsmtHalfBath     0.068517
KitchenQual      0.034258
Functional       0.068517
FireplaceQu     48.646797
GarageType       5.378554
GarageYrBlt      5.447071
GarageFinish     5.447071
GarageCars       0.034258
GarageArea       0.034258
GarageQual       5.447071
GarageCond       5.447071
PoolQC          99.657417
Fence           80.438506
MiscFeature     96.402878
SaleType         0.034258
dtype: float64

In [7]:
#Drop columns with much null values
joined = joined.drop(['Alley', 'FireplaceQu', 'PoolQC', 'Fence', 'MiscFeature'], axis = 1)

In [8]:
#Handling MSSubClass column which identifies the type of dwelling involved in the sale and has integer values
joined['MSSubClass'] = joined['MSSubClass'].astype(str)

In [9]:
#Encoding and handling null values
joined_enc = pd.get_dummies(joined)
imputer = KNNImputer()
joined_final = pd.DataFrame(imputer.fit_transform(joined_enc))
joined_final.columns = joined_enc.columns

In [10]:
#Scaling
scaler = RobustScaler()
target_scaler = RobustScaler()
joined_final_copy = joined_final.copy()
joined_final = pd.DataFrame(scaler.fit_transform(joined_final))
joined_final.columns = joined_final_copy.columns
y_train_copy = y_train.copy()
y_train = pd.DataFrame(target_scaler.fit_transform(y_train))
y_train.columns = y_train_copy.columns

In [11]:
#Separate train and test data
x_train = joined_final.iloc[0:1460, :]
x_test = joined_final.iloc[1460:, :]

In [73]:
#Choose regression models
models = [LinearRegression(), KNeighborsRegressor(), SVR(), DecisionTreeRegressor()]

In [120]:
#Fit and predict the models
results = []
for model in models:
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    results.append(str(model).replace("()", "") + ": " + str(target_scaler.inverse_transform([[pred.mean()]])))
results

  return f(**kwargs)


['LinearRegression: [[179127.98768654]]',
 'KNeighborsRegressor: [[175570.99040439]]',
 'SVR: [[164580.86971219]]',
 'DecisionTreeRegressor: [[178323.7868403]]']

In [113]:
#Sadly no target test data for evaluation, can't compare results :(