In [46]:
#!pip install tensorflow

In [47]:
# Tensorflow and Scikit-Learn packages
import tensorflow as tf
from sklearn.metrics import r2_score

#Computational and Visualisation packages
import numpy as np
import pandas as pd

In [48]:
housing_dataset = pd.read_csv("cal_housing_data with headers.csv")
housing_dataset.info()
print("\n Glimpse of the housing dataset")
housing_dataset.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
Longitude            20640 non-null float64
Latitude             20640 non-null float64
HousingMedianAge     20640 non-null int64
TotalRooms           20640 non-null int64
TotalBedrooms        20640 non-null int64
Population           20640 non-null int64
Households           20640 non-null int64
MedianIncomeValue    20640 non-null float64
MedianHouseValue     20640 non-null int64
dtypes: float64(3), int64(6)
memory usage: 1.4 MB

 Glimpse of the housing dataset


Unnamed: 0,Longitude,Latitude,HousingMedianAge,TotalRooms,TotalBedrooms,Population,Households,MedianIncomeValue,MedianHouseValue
0,-122.23,37.88,41,880,129,322,126,8.3252,452600
1,-122.22,37.86,21,7099,1106,2401,1138,8.3014,358500
2,-122.24,37.85,52,1467,190,496,177,7.2574,352100
3,-122.25,37.85,52,1274,235,558,219,5.6431,341300
4,-122.25,37.85,52,1627,280,565,259,3.8462,342200


In [49]:
# Preprocessing
housing_array = np.array([x for x in housing_dataset.values])

housing_dependent = np.delete(housing_array, 8, axis=1)
housing_target = np.delete(housing_array, slice(0, 8), axis=1)

m, n = housing_dependent.shape
housing_dependent_plus_bias = np.c_[np.ones((m, 1)), housing_dependent]

In [50]:
#import tensorflow as tf

# Calculating the compute graphs
X = tf.constant(housing_dependent_plus_bias, dtype=tf.float64, name="X")
XT = tf.transpose(X)
y = tf.constant(housing_target.reshape(-1, 1), dtype=tf.float64, name="y")

theta = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), y)

In [51]:
with tf.Session() as sess:
    theta_value = theta.eval()
    
print (theta_value)

[[ -3.59402294e+06]
 [ -4.28237438e+04]
 [ -4.25767219e+04]
 [  1.15630387e+03]
 [ -8.18164928e+00]
 [  1.13410689e+02]
 [ -3.85350953e+01]
 [  4.83082868e+01]
 [  4.02485142e+04]]


In [52]:
predicted_value = theta_value[0][0]
#Feature coefficients
coefficients = theta_value[1:]

for j, c in enumerate(coefficients):
    predicted_value += c[0] * housing_dependent[0][j]
print ("Predicted value from the model = {}".format(predicted_value))

Predicted value from the model = 411111.096064


## Computation of the quality of the trained model

In [53]:
predicted_values = np.full((m), theta_value[0][0])
coefficients = theta_value[1:] #Extracting the feature coefficients

for i, x in enumerate(housing_dependent):
    for j, c in enumerate(coefficients):
        predicted_values[i] += c * x[j]

print ("Array of computed predictions = {}".format(predicted_values))

Array of computed predictions = [ 411111.09606391  416144.49078587  380432.65417419 ...,   25026.16974242
   37991.19625315   55550.98309296]


In [54]:
# Flattening the dependent variable
y_flattened = np.ndarray.flatten(housing_target)
print (y_flattened)

[ 452600.  358500.  352100. ...,   92300.   84700.   89400.]


### Computation of the R squared

In [55]:
R_2 = r2_score(y_flattened, predicted_values)
print ("Computed R^2 = %.10f" % R_2)

Computed R^2 = 0.6371056229


### Computation of the residual

In [56]:
SSres = 0.0
for i, f_i in enumerate(predicted_values):
    diff = float(f_i - y_flattened[i])
    SSres += (diff * diff)
print ("Computed Residual = %.10f" % SSres)

Computed Residual = 99734980886003.9375000000
