Importing Dataset & Libraries

In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [21]:
df = pd.read_csv("HousingData.csv")

In [22]:
df

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [23]:
#The features like RM (average rooms), LSTAT (% lower status population), etc. are the independent variables.

#MEDV is the dependent variable (what we want to predict).

#MEDV stands for:Median value of owner-occupied homes in $1000s

x = df.drop("MEDV", axis=1).values   #independant variable  # all columns except the target
y = df["MEDV"].values       # dependant variable  # the column you want to predict

In [24]:
x.shape

(506, 13)

In [25]:
y.shape

(506,)

In [26]:
#test_size=0.2: 20% of data goes to testing, 80% to training.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [27]:
#For X_train and X_test: you'll see something like (400, 13)→ 400 rows, 13 features (columns)
#For y_train and y_test: you'll see something like (400,)→ 400 values (just 1 column, since it's the target/label)

def shape():
   print("x_train Shape :",x_train.shape)
   print("x_test Shape :",x_test.shape)
   print("y_train shape :",y_train.shape)
   print("y_test shape :",y_test.shape)
shape()

x_train Shape : (404, 13)
x_test Shape : (102, 13)
y_train shape : (404,)
y_test shape : (102,)


Data Preprocessing

In [28]:
mean = x_train.mean(axis=0)     # calculate mean of each feature (column)
std = x_train.std(axis=0)       # calculate standard deviation of each feature (column)

x_train = (x_train - mean) / std   # standardize training data
x_test = (x_test - mean) / std     # standardize test data using training stats


In [29]:
x_train[0]    #accessing the first row (or first data sample) in the x_train dataset

array([        nan,         nan,         nan,         nan, -0.40899533,
        0.60619371,         nan, -0.05506416, -0.18059065, -0.59354731,
       -0.45776474,  0.05581127,         nan])

In [30]:
y_train[0]  #accessing the first target value (or label) in the y_train dataset.

29.0

Building our Model

In [33]:
#tensorflow.keras.models contains important classes and functions to define, compile, and train machine learning models, especially neural networks.
#Sequential is a type of model where layers are stacked one after another — like a straight line (input → hidden layers → output).
#Dense means a fully connected layer.Every neuron in the layer is connected to every neuron in the previous layer.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
 

In [34]:
model=Sequential()
model.add(Dense(128,activation='relu',input_shape=(x_train[0].shape))) 
model.add(Dense(64,activation='relu'))
model.add(Dense(1,activation='linear')) 
model.compile(optimizer='adam', loss='mse', metrics=['mae']) 
model.summary()

In [40]:
model.fit(x_train, y_train, epochs=100, batch_size=1, verbose=1, validation_data=(x_test, y_test))

Epoch 1/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 92.1517 - mae: 6.5830 - val_loss: 74.3738 - val_mae: 6.0974
Epoch 2/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 72.0582 - mae: 6.2011 - val_loss: 74.3856 - val_mae: 6.1056
Epoch 3/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 79.8084 - mae: 6.3358 - val_loss: 74.4015 - val_mae: 6.1152
Epoch 4/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 85.3111 - mae: 6.5275 - val_loss: 74.4198 - val_mae: 6.1247
Epoch 5/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 91.8606 - mae: 6.8451 - val_loss: 74.4371 - val_mae: 6.1326
Epoch 6/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 81.5463 - mae: 6.4165 - val_loss: 74.4553 - val_mae: 6.1410
Epoch 7/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x1a95b0ac770>

Training our Model

In [41]:
x_test[8]

array([        nan,         nan,         nan,         nan, -0.43449351,
        0.36479725,         nan, -0.1881006 , -0.18059065, -0.59354731,
       -0.45776474,  0.27334461,         nan])

Testing our Model

In [42]:
import numpy as np

test_input = [[-0.42101827, -0.50156705, -1.13081973, -0.25683275, -0.55572682,
               0.19758953,  0.20684755, -0.34272202, -0.87422469, -0.84336666,
              -0.32505625,  0.41244772, -0.63500406]]

# Convert list to NumPy array
test_input_np = np.array(test_input)

# Now predict
print("Actual Output:", y_test[8])
print("Predicted Output:", model.predict(test_input_np))


Actual Output: 31.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
Predicted Output: [[22.730541]]


Evaluating our Model

In [43]:
mse_nn,mae_nn=model.evaluate(x_test,y_test)
print('Mean squared error on test data :',mse_nn)
print('Mean absolute error on test data :',mae_nn)

from sklearn.metrics import r2_score
y_dl=model.predict(x_test)
r2=r2_score(y_test,y_dl)
print('R2 Score :',r2)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 80.9349 - mae: 6.6601
Mean squared error on test data : 75.41452026367188
Mean absolute error on test data : 6.359581470489502
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
R2 Score : -0.014311094202272168
