In [1]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, InputLayer
from keras.optimizers import Adam

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy as cp

print(f'tf_version: {tf.__version__}')

2023-08-20 16:25:03.881589: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-20 16:25:04.220461: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-20 16:25:04.221453: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


tf_version: 2.13.0


## Data Analysis

In [2]:
# Loading the dataset
original_df = pd.read_csv('./data/Housing.csv', sep=',')
df = cp.deepcopy(original_df)
display(df.head())

target = 'price'
features = [i for i in df.columns if i not in target]

x_train_df = df[['area', 'bedrooms', 'bathrooms', 'stories', 'parking']]
x_train_np = x_train_df.to_numpy()
y_train_np = df[target].to_numpy().reshape(-1,1)
print(f'x_train shape: {x_train_np.shape} \t y_train shape: {y_train_np.shape}')

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


x_train shape: (545, 5) 	 y_train shape: (545, 1)


The function implemented by a neuron with no activation is the same as linear regression:

![Alt text](images/linearfunction.png)

In [3]:
linear_layer = tf.keras.layers.Dense(units=1, activation='linear')
a1 = linear_layer(x_train_np[0].reshape(1,-1))
print(a1)
w, b = linear_layer.get_weights()
print(f'Initial parameters: \n{w}\n{b}')

tf.Tensor([[3479.622]], shape=(1, 1), dtype=float32)
Initial parameters: 
[[ 0.46904993]
 [ 0.0257802 ]
 [-0.11544752]
 [-0.17534876]
 [-0.03720117]]
[0.]


## Data Preprocessing

### Feature Scalling
In order to make the training algorithm faster, we normalize the inputs using Keras Normalization layer

In [4]:
print('Before Normalization:')
print(f'Peak-to-peak x_train: {np.ptp(x_train_np, axis=0)}')
display(pd.DataFrame(x_train_np, columns=x_train_df.columns).describe())

# Creating the normalization layer
norm_1 = tf.keras.layers.Normalization(axis=-1) 
# Does the same as passing mean and variance in layer construction
norm_1.adapt(x_train_np) 

# Foward propagating x_train
x_train_norm = norm_1(x_train_np)

print('\nAfter Normalization:')
print(f'Peak-to-peak x_train: {np.ptp(x_train_norm, axis=0)}')
display(pd.DataFrame(x_train_norm, columns=x_train_df.columns).describe())

Before Normalization:
Peak-to-peak x_train: [14550     5     3     3     3]


Unnamed: 0,area,bedrooms,bathrooms,stories,parking
count,545.0,545.0,545.0,545.0,545.0
mean,5150.541284,2.965138,1.286239,1.805505,0.693578
std,2170.141023,0.738064,0.50247,0.867492,0.861586
min,1650.0,1.0,1.0,1.0,0.0
25%,3600.0,2.0,1.0,1.0,0.0
50%,4600.0,3.0,1.0,2.0,0.0
75%,6360.0,3.0,2.0,2.0,1.0
max,16200.0,6.0,4.0,4.0,3.0



After Normalization:
Peak-to-peak x_train: [6.7107935 6.7807055 5.9759965 3.4614208 3.4851508]


Unnamed: 0,area,bedrooms,bathrooms,stories,parking
count,545.0,545.0,545.0,545.0,545.0
mean,-8.399333e-08,-1.2599e-07,1.959844e-07,1.539878e-07,-2.799778e-08
std,1.000919,1.000919,1.000919,1.000919,1.000919
min,-1.61453,-2.665004,-0.5701866,-0.9293966,-0.8057413
25%,-0.7151452,-1.308863,-0.5701866,-0.9293966,-0.8057413
50%,-0.2539223,0.04727817,-0.5701866,0.2244103,-0.8057413
75%,0.5578299,0.04727817,1.421812,0.2244103,0.3559756
max,5.096263,4.115701,5.40581,2.532024,2.67941


### Tiling

Tile/copy our data to increase the training set size and reduce the number of training epochs.

In [5]:
x_train = np.tile(x_train_norm, (100, 1)) # Repeat 1000 times the rows while keeping the columns.
y_train = np.tile(y_train_np, (100, 1))

print(f'x_train shape before: {x_train_norm.shape}')
print(f'x_train shape after: {x_train.shape}')

x_train shape before: (545, 5)
x_train shape after: (54500, 5)


## Model Training

### Linear Model Training

In [37]:
tf.random.set_seed(100) # applied to achieve consistent results 

# Creates a linear neural network with 3 hidden layers 
L_0 = InputLayer(input_shape=(1,)) # This layer can be omitted 
L_1 = Dense(units=1, activation='linear', name='layer_1')

nn = Sequential([L_0, L_1])

display(nn.summary()) # The parameter counts correspond to the number of elements in the weight and bias.

W1, b1 = nn.get_layer("layer_1").get_weights()
print('Initialized weights: \n')
print(f"W1{W1.shape}:\n", W1, f"\nb1{b1.shape}:", b1, '\n')

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_1 (Dense)             (None, 1)                 2         
                                                                 
Total params: 2 (8.00 Byte)
Trainable params: 2 (8.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


None

Initialized weights: 

W1(1, 1):
 [[-0.17844772]] 
b1(1,): [0.] 



In [42]:
# Defines loss function and optimizer
nn.compile( 
    loss = 'mse',
    optimizer = Adam(learning_rate=10),
)

# Runs gradient descent and fits the weights to the data
nn.fit(
    x_train[:,0],y_train,            
    epochs=10,
)

Epoch 1/10
Epoch 2/10

KeyboardInterrupt: 

### DNN Training

In [31]:
tf.random.set_seed(100)  # applied to achieve consistent results

# Creates a linear neural network with 3 hidden layers 
L_0 = InputLayer(input_shape=(x_train.shape[1],)) # This layer can be omitted 
L_1 = Dense(units=25, activation='sigmoid', name='layer_1')
L_2 = Dense(units=10, activation='sigmoid', name='layer_2')
L_3 = Dense(units=3, activation='sigmoid', name='layer_3')
L_4 = Dense(units=1, activation='linear', name='layer_4')

nn = Sequential([L_0, L_1, L_2, L_3, L_4])

display(nn.summary()) # The parameter counts correspond to the number of elements in the weight and bias.

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_1 (Dense)             (None, 25)                150       
                                                                 
 layer_2 (Dense)             (None, 10)                260       
                                                                 
 layer_3 (Dense)             (None, 3)                 33        
                                                                 
 layer_4 (Dense)             (None, 1)                 4         
                                                                 
Total params: 447 (1.75 KB)
Trainable params: 447 (1.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


None

In [25]:
# Defines loss function and optimizer
nn.compile( 
    loss = 'mean_squared_error',
    optimizer = Adam(learning_rate=0.1),
)

# Runs gradient descent and fits the weights to the data
nn.fit(
    x_train,y_train,            
    epochs=10,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f583e78b590>

In [16]:
p = nn.predict(x_train[:5,:])
print(p)
print('\n',y_train[:5])

[[ 7619949. ]
 [11286305. ]
 [ 7742543.5]
 [ 7475719. ]
 [ 5958961. ]]

 [[13300000]
 [12250000]
 [12250000]
 [12215000]
 [11410000]]
