### Data Set Characteristics:

##### Number of Instances:

20640

##### Number of Attributes:

8 numeric, predictive attributes and the target

##### Attribute Information:

* MedInc median income in block
* HouseAge median house age in block
* AveRooms average number of rooms
* AveBedrms average number of bedrooms
* Population block population
* AveOccup average house occupancy
* Latitude house block latitude
* Longitude house block longitude

#### Target

The target variable is the median house value in units of 100,000 for California districts.

#### Documentation
https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html#examples-using-sklearn-datasets-fetch-california-housing

In [None]:
# conda install -c conda-forge tensorflow, to install tensorflow package

In [None]:
# Import Required Libraries:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
# Library to build Neural Networks:
import tensorflow as tf
from tensorflow import keras

In [None]:
# Fetching the data from sklearn:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()

In [None]:
housei=housing.data

In [None]:
# converting the data into a data frame:
house= pd.DataFrame(housei)

In [None]:
# To display first five records:
house.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [None]:
# Assigning names to columns:

# 'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude'
house.columns= ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']

In [None]:
house.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [None]:
print(housing.feature_names)

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [None]:
# Splitting the data:
from sklearn.model_selection import train_test_split
X= house.drop('MedInc',axis=1)
y = house['MedInc']
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.30, random_state=1)
X_valid,X_train, y_valid,y_train = train_test_split(X_train_full,y_train_full, test_size=0.90,random_state=10)


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [None]:
X_train.shape

(13004, 7)

In [None]:
# print the splitting of data:
X_train

array([[-1.79473785, -0.66845604, -0.03035367, ..., -0.03889539,
        -0.64952291,  0.52223332],
       [-0.1231456 ,  0.14104117, -0.27531077, ..., -0.04201027,
         1.41957301, -0.86899258],
       [ 0.35445219,  0.2130794 , -0.10613668, ...,  0.03845988,
        -0.59334836,  0.31778794],
       ...,
       [-0.99874154,  0.47674571, -0.06345841, ...,  0.03410083,
         0.76888447, -1.13327564],
       [ 1.23004813, -0.21509213,  0.09149555, ...,  0.0047086 ,
        -0.72910352,  0.70673282],
       [-0.99874154,  0.67142175, -0.08509454, ..., -0.00480483,
        -0.80400292,  0.85134053]])

In [None]:
### MODEL BUILDING
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train,y_train)

In [None]:
## Linear Regression Model:

In [None]:
# To represent the results in a data frame:
from sklearn import metrics
import numpy as np
scores = pd.DataFrame(columns=['Model','MAE_Train','MSE_Train','RMSE_Train','MAPE_Train','MAE_Test','MSE_Test','RMSE_Test','MAPE_Test'])

def get_metrics(train_act,train_pred,test_act,test_pred,model_description,dataframe):
    MAE_Train = metrics.mean_absolute_error(train_act,train_pred)
    MSE_Train = metrics.mean_squared_error(train_act,train_pred)
    RMSE_Train = np.sqrt(metrics.mean_squared_error(train_act,train_pred))
    MAPE_Train = metrics.mean_absolute_percentage_error(train_act,train_pred)
    MAE_Test = metrics.mean_absolute_error(test_act,test_pred)
    MSE_Test = metrics.mean_squared_error(test_act,test_pred)
    RMSE_Test = np.sqrt(metrics.mean_squared_error(test_act,test_pred))
    MAPE_Test = metrics.mean_absolute_percentage_error(test_act,test_pred)

    scores.loc[len(scores)]=[model_description,MAE_Train,MSE_Train,RMSE_Train,MAPE_Train,MAE_Test,MSE_Test,RMSE_Test,MAPE_Test]

    return(scores)

In [None]:
scores=get_metrics(y_train,model.predict(X_train),y_test,model.predict(X_test),'Lin_model',scores)
scores

Unnamed: 0,Model,MAE_Train,MSE_Train,RMSE_Train,MAPE_Train,MAE_Test,MSE_Test,RMSE_Test,MAPE_Test
0,Lin_model,0.800197,1.458469,1.207671,0.253707,0.807578,1.458401,1.207643,0.255653


### ANN Model

In [None]:
model1 = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=[7]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model1.compile(loss="mean_squared_error",
              optimizer=keras.optimizers.SGD(),
              metrics=['mae'])

In [None]:
model1_history = model1.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

Epoch 1/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - loss: 3.3622 - mae: 1.2388 - val_loss: 1.0952 - val_mae: 0.7604
Epoch 2/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 5ms/step - loss: 1.0829 - mae: 0.7520 - val_loss: 1.0271 - val_mae: 0.7392
Epoch 3/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 1.0693 - mae: 0.7329 - val_loss: 1.0249 - val_mae: 0.7276
Epoch 4/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - loss: 1.0000 - mae: 0.7215 - val_loss: 1.0333 - val_mae: 0.7271
Epoch 5/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.9301 - mae: 0.6954 - val_loss: 1.0202 - val_mae: 0.7170
Epoch 6/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.9027 - mae: 0.6910 - val_loss: 0.9646 - val_mae: 0.6894
Epoch 7/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step -

In [None]:
scores=get_metrics(y_train,model1.predict(X_train),y_test,model1.predict(X_test),'ANN_model',scores)
scores

[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


Unnamed: 0,Model,MAE_Train,MSE_Train,RMSE_Train,MAPE_Train,MAE_Test,MSE_Test,RMSE_Test,MAPE_Test
0,Lin_model,0.800197,1.458469,1.207671,0.253707,0.807578,1.458401,1.207643,0.255653
1,ANN_model,0.720487,0.933626,0.966243,0.239368,0.73664,1.041894,1.020732,0.242206


In [None]:
## model 2
model2 = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=[7]),
    keras.layers.Dense(12, activation="relu"),
    keras.layers.Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model2.compile(loss="mean_squared_error",
              optimizer='rmsprop',
              metrics=['mse'])

In [None]:
model2_history = model2.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

Epoch 1/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 9.1644 - mse: 9.1644 - val_loss: 1.6148 - val_mse: 1.6148
Epoch 2/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.2628 - mse: 1.2628 - val_loss: 1.1853 - val_mse: 1.1853
Epoch 3/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.1557 - mse: 1.1557 - val_loss: 1.1051 - val_mse: 1.1051
Epoch 4/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.0743 - mse: 1.0743 - val_loss: 1.0409 - val_mse: 1.0409
Epoch 5/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.0086 - mse: 1.0086 - val_loss: 1.0175 - val_mse: 1.0175
Epoch 6/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.9557 - mse: 0.9557 - val_loss: 0.9833 - val_mse: 0.9833
Epoch 7/20
[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - 

In [None]:
scores=get_metrics(y_train,model2.predict(X_train),y_test,model1.predict(X_test),'ANN_model',scores)
scores

[1m407/407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


Unnamed: 0,Model,MAE_Train,MSE_Train,RMSE_Train,MAPE_Train,MAE_Test,MSE_Test,RMSE_Test,MAPE_Test
0,Lin_model,0.800197,1.458469,1.207671,0.253707,0.807578,1.458401,1.207643,0.255653
1,ANN_model,0.720487,0.933626,0.966243,0.239368,0.73664,1.041894,1.020732,0.242206
2,ANN_model,0.643012,0.819799,0.905428,0.207256,0.73664,1.041894,1.020732,0.242206
