# Introduction

We are going to use a neural network for predict the value of a house. Because I want the model to work fast, I will use SVD (singular value decomposition) at the beginning, this is not necessary, but because we use less variables the epochs will go very fast.
+ we saw this formula in: Useful_python_functions_ML >>(6) Features relations>>(2) Singular value decomposition
+ this is the link: https://github.com/robertofuentesr/Useful_python_functions_ML/tree/main/(6)%20Features%20relations 

In [1]:
#pip install tensorflow

In [2]:
#pip install --upgrade keras

In [3]:
# import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer
# new library, we haven't used this one before in this repo
from sklearn.preprocessing import TargetEncoder

# Import Keras and other new libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.decomposition import TruncatedSVD
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [4]:
# Read the data
# This data you can find here: https://www.kaggle.com/c/home-data-for-ml-course/data

X_full = pd.read_csv('train.csv', index_col='Id')

# SalePrice is the target, if there is no target eliminate row associated with it
X_full.dropna(axis=0, subset=['SalePrice'], inplace=True)
y = X_full.SalePrice
X = X_full.copy()
X.drop(['SalePrice'], axis=1, inplace=True)



In [5]:
# we saw this formula in: Useful_python_functions_ML >>(6) Features relations>>(2) Singular value decomposition
# this is the link: https://github.com/robertofuentesr/Useful_python_functions_ML/tree/main/(6)%20Features%20relations 
def transforming_svd(X,y):
    X, X_test, y, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2,
                                                random_state=0)  
    # for SVD it is best to scale all numerical values.
    
    numerical_col = [col for col in X.columns if str(X[col].dtypes)!='object' ]
    categorical_col = [col for col in X.columns if str(X[col].dtypes)=='object' ]
    

    numerical_transformer = Pipeline(
    steps=[("scaler", StandardScaler()), ("imputer", KNNImputer(n_neighbors=3))
      ]
        )
    categorical_transformer =  Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=pd.NA, strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    preprocessor = ColumnTransformer(transformers=
        [("numerical_transformer", numerical_transformer, numerical_col),
        ("categorical_transformer", categorical_transformer, categorical_col)],remainder='passthrough')


    # Create principal components
    svd = TruncatedSVD(n_components=len(X.columns), n_iter=7, random_state=42)
    
    # Bundle preprocessing and modeling code in a pipeline
    pipe = Pipeline(steps=[('preprocessor', preprocessor),
                          ('model', svd)
                         ])

    
    X_svd = pipe.fit_transform(X)
    
    X_test_svd = pipe.transform(X_test)
    
    return  X_svd,X_test_svd,y, y_test

In [6]:
X_svd,X_test_svd,y, y_test = transforming_svd(X,y)

In [7]:
# Convert to dataframe
def convert_svd_df(X_svd):
    component_names = [f"svd{i+1}" for i in range(X_svd.shape[1])]
    X_svd = pd.DataFrame(X_svd, columns=component_names)
    return X_svd

X_svd = convert_svd_df(X_svd)
X_test_svd = convert_svd_df(X_test_svd)

In [8]:
# we saw this in a previous notebook this number get the 80% variance of the data and work fairly well
number_components = 31
X_svd = X_svd[X_svd.columns[0:number_components]]
X_test_svd = X_test_svd[X_test_svd.columns[0:number_components]]

In [9]:
X_svd.head()

Unnamed: 0,svd1,svd2,svd3,svd4,svd5,svd6,svd7,svd8,svd9,svd10,...,svd22,svd23,svd24,svd25,svd26,svd27,svd28,svd29,svd30,svd31
0,5.178012,4.994241,-0.477955,1.007098,3.695736,-0.394786,-0.457406,-3.141178,-0.100906,0.95418,...,0.903267,1.687794,-0.357427,0.087701,-0.013855,0.82191,0.108754,-1.27787,0.408873,0.07715
1,5.44095,-3.762135,-0.679413,-0.660386,1.928928,-0.073712,-0.85714,-0.27654,-0.444606,0.104564,...,0.13399,0.185997,0.887046,-0.002589,-0.076377,-0.068738,0.409405,-0.056461,-0.265974,0.294951
2,5.454435,-2.945801,-1.658974,0.741066,-0.372962,-0.759533,1.816474,-0.083871,-0.946973,0.359699,...,0.739886,0.313619,0.227438,0.390536,0.260251,-0.258716,0.227238,1.311141,0.231653,0.557195
3,5.47487,4.305689,-2.535842,0.919317,-0.082051,0.526221,0.585428,-0.366155,-0.351103,-0.144899,...,-0.464379,-0.759385,0.17699,-0.212306,-0.796845,-0.633266,0.286464,0.424999,0.203175,0.447677
4,5.654782,4.019248,-0.950836,-0.123134,2.930008,-1.082044,-0.661603,1.635362,0.072457,-0.45623,...,-1.981239,-0.192027,-2.23786,0.636678,-0.268383,0.225547,-0.913481,0.632468,0.470152,0.389773


In [10]:
#X_svd = np.asarray(X_svd).astype('float32')
#X_test = np.asarray(X_test).astype('float32')
model = Sequential()
callback  = keras.callbacks.EarlyStopping(monitor='loss',
                                              patience=60)
# input_shape=(number_components,) this is the first input layer
# the 500 nodes is the first hidden layer
model.add(Dense(200, input_shape=(len(X_svd.columns),), kernel_initializer='normal', activation='relu'))
model.add(Dense(300, kernel_initializer='normal'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_absolute_error', optimizer='adam')

history = model.fit(X_svd, y, epochs=500, batch_size=256,callbacks=[callback])

# Evaluate the model on the test set
print(model.evaluate(X_test_svd, y_test))
print(len(history.history['loss']) ) 

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 179470.1562  
Epoch 2/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 180778.6094 
Epoch 3/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 180592.7500 
Epoch 4/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 180457.3594 
Epoch 5/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 178995.6250 
Epoch 6/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 180670.6719 
Epoch 7/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 180955.4688 
Epoch 8/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 179563.8906 
Epoch 9/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 181055.4062 
Epoch 10/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 42763.4414 
Epoch 80/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 43988.3438 
Epoch 81/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 40412.0234 
Epoch 82/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 37988.9297 
Epoch 83/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 37430.8984 
Epoch 84/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 34982.5156 
Epoch 85/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 34625.1719 
Epoch 86/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 34688.6523 
Epoch 87/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 30833.7676 
Epoch 88/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16050.2129 
Epoch 158/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 17353.6328 
Epoch 159/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16708.1680 
Epoch 160/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16628.7422 
Epoch 161/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16102.3652 
Epoch 162/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16758.9902 
Epoch 163/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16584.4844 
Epoch 164/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16197.4268 
Epoch 165/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16597.3340 
Epoch 166/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15452.3223 
Epoch 236/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15686.5312 
Epoch 237/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15572.5566 
Epoch 238/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16323.2510 
Epoch 239/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16014.1299 
Epoch 240/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15648.2373 
Epoch 241/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15959.9863 
Epoch 242/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15877.3896 
Epoch 243/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 16243.7383 
Epoch 244/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15082.1729 
Epoch 314/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15383.5684 
Epoch 315/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14651.1250 
Epoch 316/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15479.0713 
Epoch 317/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14813.5352 
Epoch 318/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15849.0518 
Epoch 319/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15625.8936 
Epoch 320/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15410.3848 
Epoch 321/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15761.7480 
Epoch 322/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15211.2148 
Epoch 392/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 14393.8330 
Epoch 393/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14346.2207 
Epoch 394/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 15007.0195 
Epoch 395/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 15035.3350 
Epoch 396/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14396.1035 
Epoch 397/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 14783.4785 
Epoch 398/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14529.0410 
Epoch 399/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 14266.3809 
Epoch 400/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14583.7285 
Epoch 470/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 14554.6494 
Epoch 471/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 14157.2646 
Epoch 472/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 14406.2354 
Epoch 473/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 13719.8145 
Epoch 474/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 14424.8877 
Epoch 475/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 14376.8867 
Epoch 476/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 14355.6924 
Epoch 477/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 13805.8359 
Epoch 478/500
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [11]:
model.summary()

In [12]:
model.evaluate(X_test_svd, y_test)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 980us/step - loss: 18982.1016


20745.23828125

We didn't improve our predictions. Maybe this is not the best model to fit in a small dataset.