In [None]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics
from scipy import stats
import math
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statistics import mean, median, mode, stdev
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from scipy.optimize import minimize
from sklearn.model_selection import KFold

from sklearn.metrics import r2_score, mean_squared_error

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Import dataset

In [None]:
train = pd.read_csv('train.csv')

Cleaning null + OHE

In [None]:
nullData = [['LotFrontage', 259], ['MasVnrArea', 8], ['Electrical', 1], ['GarageYrBlt', 81]]
n = len(train)
treshold = 0.1
drop = []

print('Drop feature - too many nulls:')
for i in nullData:
    if i[1]/n > treshold: # Arbitrary treshold: 10%
        print(i[0])
        train.drop(columns=[i[0]], inplace=True)
    else:
        drop.append(i[0])
        
print('Remove data point:')
print(drop)
train.dropna(subset=drop, inplace=True)

In [None]:
#One-Hot encoding
categoricalcolumns = ['MSSubClass', 'MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond','Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
train0 = pd.get_dummies(train, columns= categoricalcolumns, prefix= categoricalcolumns)

In [None]:
y = train0['SalePrice']
X = train0.drop(['Id', 'SalePrice'], axis=1)

train_X0, test_X0, train_Y0, test_Y0 = train_test_split(X,y,test_size=0.2, random_state=0)

## Adam
note: Nadam, adamax, and ftrl give better scores than adam for bare set <br>
nadam was the best so i only kept that


In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(train_X0.shape[1])))
model.add(tf.keras.layers.Dense(1, activation='relu'))
opt = keras.optimizers.Adam()
model.compile(opt, loss='mse')
model.fit(train_X0, train_Y0)

In [None]:
y_pred = model.predict(train_X0)
print('Train mse loss\t:', mean_squared_error(train_Y0, np.squeeze(y_pred)))
print('Train R2 score\t:', r2_score(train_Y0, np.squeeze(y_pred)))

y_pred = model.predict(test_X0)
print('Test mse loss\t:', mean_squared_error(test_Y0, np.squeeze(y_pred)))
print('Test R2 score\t:', r2_score(test_Y0, np.squeeze(y_pred)))

## Nadam

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(train_X0.shape[1])))
model.add(tf.keras.layers.Dense(1, activation='relu'))
opt = keras.optimizers.Nadam()
model.compile(opt, loss='mse')
model.fit(train_X0, train_Y0)

In [None]:
y_pred = model.predict(train_X0)
print('Train mse loss\t:', mean_squared_error(train_Y0, np.squeeze(y_pred)))
print('Train R2 score\t:', r2_score(train_Y0, np.squeeze(y_pred)))

y_pred = model.predict(test_X0)
print('Test mse loss\t:', mean_squared_error(test_Y0, np.squeeze(y_pred)))
print('Test R2 score\t:', r2_score(test_Y0, np.squeeze(y_pred)))

### Try fit to quadratic curve

In [None]:
# data features
d = 2
poly = PolynomialFeatures(d)
train_X0_2 = poly.fit_transform(train_X0)
test_X0_2 = poly.fit_transform(test_X0)

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(train_X0_2.shape[1])))
model.add(tf.keras.layers.Dense(1, activation='relu'))
opt = keras.optimizers.Adam()
model.compile(opt, loss='mse')
model.fit(train_X0_2, train_Y0)

y_pred = model.predict(train_X0_2)
print('Train mse loss\t:', mean_squared_error(train_Y0, np.squeeze(y_pred)))
print('Train R2 score\t:', r2_score(train_Y0, np.squeeze(y_pred)))

y_pred = model.predict(test_X0_2)
print('Test mse loss\t:', mean_squared_error(test_Y0, np.squeeze(y_pred)))
print('Test R2 score\t:', r2_score(test_Y0, np.squeeze(y_pred)))

## Try with pre-processing

### train_ONE

In [None]:
train1 = pd.read_csv('train_ONE.csv')

y = train1['SalePrice']
X = train1.drop(['Id', 'SalePrice'], axis=1)

train_X1, test_X1, train_Y1, test_Y1 = train_test_split(X,y,test_size=0.2, random_state=0)

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(train_X1.shape[1])))
model.add(tf.keras.layers.Dense(1, activation='relu'))
opt = keras.optimizers.Adam()
model.compile(opt, loss='mse')
model.fit(train_X1, train_Y1)

In [None]:
y_pred = model.predict(train_X1)
print('Train mse loss\t:', mean_squared_error(train_Y1, np.squeeze(y_pred)))
print('Train R2 score\t:', r2_score(train_Y1, np.squeeze(y_pred)))

y_pred = model.predict(test_X1)
print('Test mse loss\t:', mean_squared_error(test_Y1, np.squeeze(y_pred)))
print('Test R2 score\t:', r2_score(test_Y1, np.squeeze(y_pred)))

### train_TWO

In [None]:
train2 = pd.read_csv('train_TWO.csv')

y = train2['SalePrice']
X = train2.drop(['Id', 'SalePrice'], axis=1)

train_X2, test_X2, train_Y2, test_Y2 = train_test_split(X,y,test_size=0.2, random_state=0)

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(train_X2.shape[1])))
model.add(tf.keras.layers.Dense(1, activation='relu'))
opt = keras.optimizers.Adam()
model.compile(opt, loss='mse')
model.fit(train_X2, train_Y2)

In [None]:
y_pred = model.predict(train_X2)
print('Train mse loss\t:', mean_squared_error(train_Y2, np.squeeze(y_pred)))
print('Train R2 score\t:', r2_score(train_Y2, np.squeeze(y_pred)))

y_pred = model.predict(test_X2)
print('Test mse loss\t:', mean_squared_error(test_Y2, np.squeeze(y_pred)))
print('Test R2 score\t:', r2_score(test_Y2, np.squeeze(y_pred)))

### train_THREE

In [None]:
train3 = pd.read_csv('train_ONE.csv')

y = train3['SalePrice']
X = train3.drop(['Id', 'SalePrice'], axis=1)

train_X3, test_X3, train_Y3, test_Y3 = train_test_split(X,y,test_size=0.2, random_state=0)

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(train_X3.shape[1])))
model.add(tf.keras.layers.Dense(1, activation='relu'))
opt = keras.optimizers.Adam()
model.compile(opt, loss='mse')
model.fit(train_X3, train_Y3)

In [None]:
y_pred = model.predict(train_X3)
print('Train mse loss\t:', mean_squared_error(train_Y3, np.squeeze(y_pred)))
print('Train R2 score\t:', r2_score(train_Y3, np.squeeze(y_pred)))

y_pred = model.predict(test_X3)
print('Test mse loss\t:', mean_squared_error(test_Y3, np.squeeze(y_pred)))
print('Test R2 score\t:', r2_score(test_Y3, np.squeeze(y_pred)))

### train_FOUR

In [None]:
train4 = pd.read_csv('train_ONE.csv')

y = train4['SalePrice']
X = train4.drop(['Id', 'SalePrice'], axis=1)

train_X4, test_X4, train_Y4, test_Y4 = train_test_split(X,y,test_size=0.2, random_state=0)

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(train_X4.shape[1])))
model.add(tf.keras.layers.Dense(1, activation='relu'))
opt = keras.optimizers.Adam()
model.compile(opt, loss='mse')
model.fit(train_X4, train_Y4)

In [None]:
y_pred = model.predict(train_X4)
print('Train mse loss\t:', mean_squared_error(train_Y4, np.squeeze(y_pred)))
print('Train R2 score\t:', r2_score(train_Y4, np.squeeze(y_pred)))

y_pred = model.predict(test_X4)
print('Test mse loss\t:', mean_squared_error(test_Y4, np.squeeze(y_pred)))
print('Test R2 score\t:', r2_score(test_Y4, np.squeeze(y_pred)))