In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from sklearn.model_selection import train_test_split

In [31]:
df = pd.read_csv('house_price_full+(2).csv')
df.head()

Unnamed: 0,bedrooms,sqft_living,price
0,3,1340,313000
1,5,3650,2384000
2,3,1930,342000
3,3,2000,420000
4,4,1940,550000


In [58]:
df.shape

(499, 3)

In [32]:
X = df.copy()
#removing the target
Y = X.pop('price')

In [33]:
## scaler transform on the independent features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [34]:
## perform log transformation on the target variable
Y = np.log(Y)

In [35]:
df_scaled = pd.DataFrame(X)
print(df_scaled)

            0         1
0   -0.433198 -0.753258
1    1.675735  1.457330
2   -0.433198 -0.188649
3   -0.433198 -0.121661
4    0.621269 -0.179079
..        ...       ...
494  0.621269  0.873582
495  1.675735  2.299459
496 -0.433198 -0.724549
497 -0.433198 -0.179079
498 -0.433198 -1.040347

[499 rows x 2 columns]


In [36]:
X_train,X_val,y_train,y_val = train_test_split(df_scaled,Y,test_size=0.1,random_state=42)

In [37]:
model = keras.Sequential(
    [
        keras.layers.Dense(
            10,activation='relu',input_shape=(X_train.shape[-1],)
        ),
        ## keras.layers.Dropout(0.2)
        keras.layers.Dense(
            10,activation='relu'),
        keras.layers.Dense(
            5,activation='relu'),
        keras.layers.Dense(1,activation='linear')
        
        
    ]
)

In [38]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_28 (Dense)            (None, 10)                30        
                                                                 
 dense_29 (Dense)            (None, 10)                110       
                                                                 
 dense_30 (Dense)            (None, 5)                 55        
                                                                 
 dense_31 (Dense)            (None, 1)                 6         
                                                                 
Total params: 201 (804.00 Byte)
Trainable params: 201 (804.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [39]:
model.compile(
    optimizer=keras.optimizers.SGD(),loss='mean_squared_error',
    metrics=['mse']
)

In [40]:
model.fit(X_train,y_train,epochs=10,batch_size=32,validation_data=(X_val,y_val),verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x14be9197100>

In [41]:
## loss: 0.1592 - mse: 0.1592 - val_loss: 0.2566 - val_mse: 0.2566 --> when no dropout layer

### Adding a dropout layer

In [42]:
model1 = keras.Sequential(
    [
      keras.layers.Dense(
      10,activation='relu',input_shape=(X_train.shape[-1],)),
      keras.layers.Dropout(0.2),
      keras.layers.Dense(
          10,activation='relu'),
      keras.layers.Dense(
         5,activation='relu'),
      keras.layers.Dense(1,activation='linear')
]
)

In [43]:
## recompiling it
model1.summary()
model1.compile(
    optimizer=keras.optimizers.SGD(),loss='mean_squared_error',metrics=['mse'])

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_32 (Dense)            (None, 10)                30        
                                                                 
 dropout_3 (Dropout)         (None, 10)                0         
                                                                 
 dense_33 (Dense)            (None, 10)                110       
                                                                 
 dense_34 (Dense)            (None, 5)                 55        
                                                                 
 dense_35 (Dense)            (None, 1)                 6         
                                                                 
Total params: 201 (804.00 Byte)
Trainable params: 201 (804.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [44]:
## retraining the model
model1.fit(X_train,y_train,epochs=10,batch_size=32,validation_data=(X_val,y_val),verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x14be6095f90>

In [45]:
## loss: 0.1592 - mse: 0.1592 - val_loss: 0.2566 - val_mse: 0.2566 --> when no dropout layer
## loss: 0.4056 - mse: 0.4056 - val_loss: 0.4126 - val_mse: 0.4126 --> when dropout layer is added

- Inference
1. Here we can clearly see in the first case there was clearly a case of  verfitting , by observing the loss and validation loss.
2. In the second scenario clearly the validation loss is much better in comparison to the first.

In [46]:
## results
res = pd.DataFrame({
    'Training loss':(),
    'Validation loss':()
})

In [47]:
res.loc[0] = [0.1592,0.2566]
res

Unnamed: 0,Training loss,Validation loss
0,0.1592,0.2566


In [48]:
res.loc[1] = [0.4056,0.4126]
res

Unnamed: 0,Training loss,Validation loss
0,0.1592,0.2566
1,0.4056,0.4126


In [49]:
res.index = ['Without Dropout','With Dropout']
res

Unnamed: 0,Training loss,Validation loss
Without Dropout,0.1592,0.2566
With Dropout,0.4056,0.4126


### Thus we can say that adding the dropout layer has regularized the model and helped in reducing overfitting.

In [50]:
model2 = keras.Sequential(
    [
      keras.layers.Dense(
      10,activation='relu',input_shape=(X_train.shape[-1],)),
      keras.layers.Dropout(0.2),
      keras.layers.Dense(
          10,activation='relu'),
      keras.layers.Dense(
         5,activation='relu'),
      keras.layers.BatchNormalization(),
      keras.layers.Dense(1,activation='linear')
]
)

In [51]:
model2.compile(
    optimizer=keras.optimizers.SGD(),loss='mean_squared_error',metrics=['mse'])

In [52]:
## retraining the model
model2.fit(X_train,y_train,epochs=10,batch_size=32,validation_data=(X_val,y_val),verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x14beb9c4eb0>

In [53]:
res.loc[len(res)] = [0.1946,0.1685]
res

Unnamed: 0,Training loss,Validation loss
Without Dropout,0.1592,0.2566
With Dropout,0.4056,0.4126
2,0.1946,0.1685


In [57]:
res.index = ['Without Dropout','With Dropout','After Batch Norm']
res

Unnamed: 0,Training loss,Validation loss
Without Dropout,0.1592,0.2566
With Dropout,0.4056,0.4126
After Batch Norm,0.1946,0.1685


### Clearly after batch normalization our validation loss has improved and is better than that of the training loss.