In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import gpflow
import gpflux
import sklearn
from sklearn import model_selection
from sklearn import preprocessing
from sklearn import metrics
from sklearn import svm

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("Real estate.csv")
tf.keras.backend.set_floatx("float64")

In [4]:
X = np.array(df.drop(['No','Y house price of unit area'], axis=1))
Y = np.array(df['Y house price of unit area'])

In [5]:
X = np.reshape(X,(-1,6))
Y = np.reshape(Y, (-1,1))

In [6]:
X_train = X[0:290]
X_val = X[290:352]
X_test = X[352:414]

In [7]:
Y_train = Y[0:290]
Y_val = Y[290:352]
Y_test = Y[352:414]

In [8]:
scaler = preprocessing.StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [9]:
n = len(Y_train)

In [10]:
kernels = [gpflow.kernels.Cosine(), gpflow.kernels.Matern12(), gpflow.kernels.Matern32(), gpflow.kernels.Matern52(),
           gpflow.kernels.RBF(), gpflow.kernels.RationalQuadratic(), 
           gpflow.kernels.Periodic(base_kernel=gpflow.kernels.RBF())]

In [215]:
best_mse1 = 100000
best_model1 = None
mae1 = None
best_kernel = None

In [216]:
for k in kernels:
    for i in range(0,2):
        
        kernel1 = k
        inducing_points1 = gpflow.inducing_variables.InducingPoints(np.random.random((64, 6)))
        layer1 = gpflux.layers.GPLayer(kernel1,inducing_points1,num_data=n,num_latent_gps=1,
                                  mean_function=gpflow.mean_functions.Zero())
        likelihood = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian())
        gp = gpflux.models.DeepGP([layer1],likelihood)
        model = gp.as_training_model()
        model.compile(tf.optimizers.Adam(0.01))
        model.fit({"inputs":X_train,"targets":Y_train},epochs=50,verbose=0)
        model = gp.as_prediction_model()
        out_val = model(X_val)
        pred_val = out_val.f_mean.numpy().squeeze()
        mse_val = metrics.mean_squared_error(Y_val,pred_val)
        mae_val = metrics.mean_absolute_error(Y_val,pred_val)
        if(mse_val < best_mse1):
            best_mse1 = mse_val
            best_model1 =model
            mae1 = mae_val
            best_kernel = k

In [217]:
best_kernel

name,class,transform,prior,trainable,shape,dtype,value
RationalQuadratic.variance,Parameter,Softplus,,True,(),float64,22.0731
RationalQuadratic.lengthscales,Parameter,Softplus,,True,(),float64,2.65859
RationalQuadratic.alpha,Parameter,Softplus,,True,(),float64,0.758721


In [218]:
best_mse1

58.941263732295965

In [219]:
mae1

5.36097410758654

In [220]:
out_test = best_model1(X_test)
pred_test = out_test.f_mean.numpy().squeeze()
mse_test1 = metrics.mean_squared_error(Y_test,pred_test)
mae_test1 = metrics.mean_absolute_error(Y_test,pred_test)

In [221]:
mse_test1

54.070551536998394

In [222]:
mae_test1

5.635127890934849

In [22]:
best_mse2 = 100000
best_model2 = None
mae2 = None
best_k1 = None
best_k2 = None

In [23]:
for k1 in kernels:
    for k2 in kernels:
        for i in range(0,2):
            kernel1 = k1+k2
            inducing_points1 = gpflow.inducing_variables.InducingPoints(np.random.random((64, 6)))
            layer1 = gpflux.layers.GPLayer(kernel1,inducing_points1,num_data=n,num_latent_gps=6)
            kernel2 = k1+k2
            inducing_points2 = gpflow.inducing_variables.InducingPoints(np.random.random((32, 6)))
            layer2 = gpflux.layers.GPLayer(kernel2,inducing_points2,num_data =n,num_latent_gps=1,mean_function=gpflow.mean_functions.Zero())
            likelihood = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian())
            deep_gp = gpflux.models.DeepGP([layer1, layer2],likelihood)
            model = deep_gp.as_training_model()
            model.compile(tf.optimizers.Adam(0.01))
            model.fit({"inputs":X_train,"targets":Y_train},epochs=50,verbose=0)
            model = deep_gp.as_prediction_model()
            out_val = model(X_val)
            pred_val = out_val.f_mean.numpy().squeeze()
            mse_val = metrics.mean_squared_error(Y_val,pred_val)
            mae_val = metrics.mean_absolute_error(Y_val,pred_val)
            if(mse_val < best_mse2):
                best_mse2 = mse_val
                best_model2 = model
                mae2 = mae_val
                best_k1 = k1
                best_k2 = k2

In [24]:
best_mse2

52.258784724612

In [25]:
mae2

5.228190621800514

In [26]:
best_k1

name,class,transform,prior,trainable,shape,dtype,value
Matern32.variance,Parameter,Softplus,,True,(),float64,61.1827
Matern32.lengthscales,Parameter,Softplus,,True,(),float64,43.0283


In [27]:
best_k2

name,class,transform,prior,trainable,shape,dtype,value
Matern52.variance,Parameter,Softplus,,True,(),float64,59.1284
Matern52.lengthscales,Parameter,Softplus,,True,(),float64,35.6714


In [28]:
out_test = best_model2(X_test)
pred_test = out_test.f_mean.numpy().squeeze()
mse_test2 = metrics.mean_squared_error(Y_test,pred_test)
mae_test2 = metrics.mean_absolute_error(Y_test,pred_test)

In [29]:
mse_test2

71.53710357212199

In [30]:
mae_test2

6.843591931790883

In [13]:
best_mse3 = 100000
best_model3 = None
mae3 = None
best_k11 = None
best_k22 = None

In [14]:
for k1 in kernels:
    for k2 in kernels:
        for i in range(0,2):
            kernel1 = k1*k2
            inducing_points1 = gpflow.inducing_variables.InducingPoints(np.random.random((64, 6)))
            layer1 = gpflux.layers.GPLayer(kernel1,inducing_points1,num_data=n,num_latent_gps=6)
            kernel2 = k1*k2
            inducing_points2 = gpflow.inducing_variables.InducingPoints(np.random.random((32, 6)))
            layer2 = gpflux.layers.GPLayer(kernel2,inducing_points2,num_data =n,num_latent_gps=1,mean_function=gpflow.mean_functions.Zero())
            likelihood = gpflux.layers.LikelihoodLayer(gpflow.likelihoods.Gaussian())
            deep_gp = gpflux.models.DeepGP([layer1, layer2],likelihood)
            model = deep_gp.as_training_model()
            model.compile(tf.optimizers.Adam(0.01))
            model.fit({"inputs":X_train,"targets":Y_train},epochs=50,verbose=0)
            model = deep_gp.as_prediction_model()
            out_val = model(X_val)
            pred_val = out_val.f_mean.numpy().squeeze()
            mse_val = metrics.mean_squared_error(Y_val,pred_val)
            mae_val = metrics.mean_absolute_error(Y_val,pred_val)
            if(mse_val < best_mse3):
                best_mse3 = mse_val
                best_model3 = model
                mae3 = mae_val
                best_k11 = k1
                best_k22 = k2

In [15]:
best_mse3

50.27310254007834

In [16]:
mae3

5.0739382633180155

In [17]:
best_k11

name,class,transform,prior,trainable,shape,dtype,value
SquaredExponential.variance,Parameter,Softplus,,True,(),float64,13.0498
SquaredExponential.lengthscales,Parameter,Softplus,,True,(),float64,27.345


In [18]:
best_k22

name,class,transform,prior,trainable,shape,dtype,value
Matern52.variance,Parameter,Softplus,,True,(),float64,17.2788
Matern52.lengthscales,Parameter,Softplus,,True,(),float64,45.6921


In [19]:
out_test = best_model3(X_test)
pred_test = out_test.f_mean.numpy().squeeze()
mse_test3 = metrics.mean_squared_error(Y_test,pred_test)
mae_test3 = metrics.mean_absolute_error(Y_test,pred_test)

In [20]:
mse_test3

91.99616203789655

In [21]:
mae_test3

6.684890144366629

In [145]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError
from tensorflow.keras.optimizers import Adam

In [146]:
best_model4 = None
best_mse4 = 100000
mae4 = None 

In [147]:
for i in range(0,3):
    model = Sequential([InputLayer(6,),Dense(32, activation='relu'),Dense(16, activation='relu'), Dense(1)])
    model.compile(optimizer=Adam(learning_rate=0.001), loss=MeanSquaredError(),
              metrics = [MeanAbsoluteError()])
    model.fit(X_train,Y_train,epochs=50, batch_size=16, validation_data=(X_val,Y_val), verbose=0)
    mse_val, mae_val = model.evaluate(X_val,Y_val)
    if(mse_val < best_mse4):
        best_mse4 = mse_val
        mae4 = mae_val
        best_model4 = model



In [148]:
best_mse4

53.6710490883962

In [149]:
mae4

5.151918173684164

In [175]:
mse_test4 = metrics.mean_squared_error(Y_test, best_model4.predict(X_test))
mae_test4 = metrics.mean_absolute_error(Y_test, best_model4.predict(X_test))



In [176]:
mse_test4

46.84040950339601

In [177]:
mae_test4

5.3229673328844385

In [150]:
model5 = sklearn.linear_model.LinearRegression()

In [151]:
model5.fit(X_train, Y_train)

LinearRegression()

In [152]:
pred_val = model5.predict(X_val)

In [153]:
mse5 = metrics.mean_squared_error(Y_val,pred_val)

In [154]:
mae5 = metrics.mean_absolute_error(Y_val,pred_val)

In [155]:
mse5

64.02101537186611

In [156]:
mae5

6.183107053753475

In [178]:
pred_test = model5.predict(X_test)
mse_test5 = metrics.mean_squared_error(Y_test,pred_test)
mae_test5 = metrics.mean_absolute_error(Y_test,pred_test)

In [181]:
mse_test5

67.29538020632779

In [182]:
mae_test5

6.209483361273056

In [157]:
Cs = [0.001, 0.01, 0.1, 1, 10, 100]
gammas = [0.001, 0.01, 0.1, 1, 10, 100]

In [158]:
best_mse6 = 100000
mae6 = None
best_C = None
best_gamma = None
best_model6 = None

In [159]:
for C in Cs:
    for gamma in gammas:
        model = svm.SVR(kernel='rbf', gamma=gamma, C = C)
        model.fit(X_train, Y_train)
        mse_val = metrics.mean_squared_error(Y_val, model.predict(X_val))
        mae_val = metrics.mean_absolute_error(Y_val, model.predict(X_val))
        if (mse_val < best_mse6):
            best_mse6 = mse_val
            best_C = C
            best_gamma = gamma
            best_model6 = model
            mae6 = mae_val

In [160]:
best_C

100

In [161]:
best_gamma

0.1

In [162]:
best_mse6

51.47507746015518

In [163]:
mae6

5.0456167950347135

In [168]:
model = svm.SVR(kernel='rbf', gamma=best_gamma, C = best_C)
model.fit(X_train, Y_train)

SVR(C=100, gamma=0.1)

In [169]:
mse_test6 = metrics.mean_squared_error(Y_test, model.predict(X_test))
mae_test6 = metrics.mean_absolute_error(Y_test, model.predict(X_test))

In [170]:
mse_test6

46.21066994157352

In [171]:
mae_test6

4.804703258497158