# Artificial Neural Network

Artificial Neural Network with the following configuration reached a root-mean-squared-error of 0.166

In [1]:
import pandas as pd
import numpy as np

In [2]:
import tensorflow as tf

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [4]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [5]:
train = pd.read_csv('dataset.csv')
testset = pd.read_csv('testset.csv')
test_all = pd.read_csv('test.csv')

In [6]:
train = train.drop(['Id'], axis=1)
test = testset.drop(['Id'], axis=1)

In [7]:
X = train.iloc[:, :-1].values
y = train.iloc[:, -1].values
test_data = test.iloc[:,:].values

In [8]:
def transform_columns(X, test_data, columns):
    ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), columns)], remainder='passthrough')
    # X = np.array(ct.fit_transform(X))
    X = ct.fit_transform(X)
    # test_data = np.array(ct.transform(test_data))
    test_data = ct.transform(test_data)

    return X, test_data

def standardize(X_train, X_test, test_data):
    sc = StandardScaler(with_mean=False)
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    test_data = sc.transform(test_data)

    return X_train, X_test, test_data
    
def preds_to_file(test_all, preds, filename):
    preds = np.concatenate(preds)
    output = pd.DataFrame({'Id': test_all.Id, 'SalePrice': preds})
    output.to_csv(filename, index=False)
    print(f"File {filename} created!")

In [9]:
def get_ann():
    ann = tf.keras.models.Sequential()
    ann.add(tf.keras.layers.Dense(units=135, activation='relu'))
    ann.add(tf.keras.layers.Dense(units=135, activation='relu'))
    ann.add(tf.keras.layers.Dense(units=300, activation='relu'))
    ann.add(tf.keras.layers.Dense(units=135, activation='relu'))
    ann.add(tf.keras.layers.Dense(units=135, activation='relu'))
    ann.add(tf.keras.layers.Dense(units=1))

    return ann

In [10]:
def perform_discriminant_analysis(n_components, X_train, X_test, test_data):
  lda = LinearDiscriminantAnalysis(n_components=n_components)
  X_train = lda.fit_transform(X_train, y_train)
  X_test = lda.transform(X_test)
  test_data = lda.transform(test_data)

  return X_train, X_test, test_data

In [11]:
 categorical_cols = [1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 36, 37, 38, 39, 50, 52, 54, 56, 59, 60, 61, 71, 72]
 X, test_data = transform_columns(X, test_data, categorical_cols)
 X = X.toarray()
 test_data = test_data.toarray()

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
X_train, X_test, test_data = standardize(X_train, X_test, test_data)

In [13]:
# X_train, X_test, test_data = perform_discriminant_analysis(200, X_train, X_test, test_data)

In [14]:
print(X_train.shape)
ann = get_ann()
ann.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.RootMeanSquaredError()])

(1022, 269)


In [15]:
ann.fit(X_train, y_train, batch_size=32, epochs=200, validation_data=(X_test, y_test))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x118ed9fd0>

In [16]:
y_pred = ann.predict(X_test)
print(r2_score(y_test, y_pred))

0.8908751373785729


In [17]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE: %f" % (rmse))

RMSE: 27913.877635


In [18]:
preds = ann.predict(test_data)
preds_to_file(test_all, preds, "ann_reg_01.csv")

File ann_reg_01.csv created!
