In [None]:
# ----- Libraries ----- #

# This is the main Library that allows us to work with Neural Networks
import tensorflow as tf


# For graph plotting
import matplotlib.pyplot as plt
from tensorflow.math import confusion_matrix

# For dataset manipulation
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

# For visualizing more complex graphs
import seaborn as sns

# Miscellaneous Libraries
import os

In [None]:
raw_train_dataset = pd.read_csv("Boston_House_Price.csv")
processed_dataset = raw_train_dataset.copy()
raw_train_dataset

In [None]:
# prompt: Using dataframe raw_train_dataset: i want to change the NOX column from the dataframe to have normalized values and then do this:
# As this values increase, the price value goes down, so i want you to make sure at the normalization that the higher values of nox are the lower ones and the lower values are the highest ones

# Normalize the 'NOX' column so that higher values are lower and lower values are higher
processed_dataset['NOX'] = (processed_dataset['NOX'].max() - processed_dataset['NOX']) / (processed_dataset['NOX'].max() - processed_dataset['NOX'].min())

In [9]:
processed_dataset['CRIM'] = (processed_dataset['CRIM'].max() - processed_dataset['CRIM']) / (processed_dataset['CRIM'].max() - processed_dataset['CRIM'].min())

In [18]:
processed_dataset['TAX'] = (processed_dataset['TAX'].max() - processed_dataset['TAX']) / (processed_dataset['TAX'].max() - processed_dataset['TAX'].min())

In [21]:
### --- MIN-MAX NORMALIZATION ---
# To normalize, first we have to get the maximum and minimum value
max_zn = np.max(processed_dataset['ZN'])
min_zn = np.min(processed_dataset['ZN'])
print('Max ZN level: {}\t Min ZN level: {}'.format(max_zn, min_zn))

# Remember the .apply() function? We can use that here to normalize EVERY value of the column!
# But first we have to define our normalization function
def normalize_zn(x):
  return (x-min_zn)/(max_zn-min_zn)

# Now, lets normalize every value of the column!
processed_dataset['ZN'] = processed_dataset['ZN'].apply(normalize_zn)

# And finally, lets check that all the possible UNIQUE values that the column has are indeed between 0 and 1
processed_dataset['ZN'].unique()

Max ZN level: 100.0	 Min ZN level: 0.0


array([0.18 , 0.   , 0.125, 0.75 , 0.21 , 0.9  , 0.85 , 1.   , 0.25 ,
       0.175, 0.8  , 0.28 , 0.45 , 0.6  , 0.95 , 0.825, 0.3  , 0.22 ,
       0.2  , 0.4  , 0.55 , 0.525, 0.7  , 0.34 , 0.33 , 0.35 ])

In [22]:
processed_dataset

Unnamed: 0,CRIM,ZN,INDUS,NOX,RM,AGE,DIS,TAX,PTRATIO,PRICE
0,1.000000,0.18,2.31,0.685185,6.575,0,4.0900,0.791985,15.3,24.0
1,0.999764,0.00,7.07,0.827160,6.421,0,4.9671,0.895038,17.8,21.6
2,0.999764,0.00,7.07,0.827160,7.185,0,4.9671,0.895038,17.8,34.7
3,0.999707,0.00,2.18,0.849794,6.998,1,6.0622,0.933206,18.7,33.4
4,0.999295,0.00,2.18,0.849794,7.147,0,6.0622,0.933206,18.7,36.2
...,...,...,...,...,...,...,...,...,...,...
501,0.999367,0.00,11.93,0.613169,6.593,0,2.4786,0.835878,21.0,22.4
502,0.999562,0.00,11.93,0.613169,6.120,0,2.2875,0.835878,21.0,20.6
503,0.999388,0.00,11.93,0.613169,6.976,0,2.1675,0.835878,21.0,23.9
504,0.998839,0.00,11.93,0.613169,6.794,0,2.3889,0.835878,21.0,22.0


In [13]:
# prompt: Using dataframe raw_train_dataset: i want the values from the column age to be 1 when te value is lower than 50 and 0 when is higher

# Replace values in 'AGE' column based on condition
processed_dataset['AGE'] = processed_dataset['AGE'].apply(lambda x: 1 if x < 50 else 0)

In [16]:
# prompt: Using dataframe raw_train_dataset: delete the column CHAS

# Drop the 'CHAS' column from the DataFrame
processed_dataset = processed_dataset.drop('CHAS', axis=1)
processed_dataset = processed_dataset.drop('B', axis=1)
processed_dataset = processed_dataset.drop('LSTAT', axis=1)
processed_dataset = processed_dataset.drop('RAD', axis=1)

In [75]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Split dataset into training and testing
train, test = train_test_split(processed_dataset, test_size=0.2)

# Separate into features and target
independent_variables = list(processed_dataset.columns)
independent_variables.remove('PRICE')
dependent_variables = ['PRICE']

train_set = train[independent_variables]
train_target = train[dependent_variables]
test_set = test[independent_variables]
test_target = test[dependent_variables]

# Build regression model
normal_model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(len(independent_variables),)),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1)  # Linear activation for regression
])

# Compile with appropriate regression loss
normal_model.compile(
    loss='mse',
    optimizer='adam',
    metrics=[tf.keras.metrics.MeanAbsoluteError()]
)

# Train model
normal_model.fit(
    train_set,
    train_target,
    epochs=150,
    batch_size=256,
    validation_split=0.2,
)

# Predict and evaluate
predictions = normal_model.predict(test_set).flatten()
true_values = test_target.values.flatten()

# Metrics
mae = mean_absolute_error(true_values, predictions)
rmse = np.sqrt(mean_squared_error(true_values, predictions))
r2 = r2_score(true_values, predictions)
n = test_set.shape[0]
k = test_set.shape[1]
adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - k - 1)

# Print evaluation results
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")
print(f"Adjusted R²: {adjusted_r2:.4f}")

Epoch 1/150




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 477ms/step - loss: 601.9394 - mean_absolute_error: 22.6770 - val_loss: 587.1639 - val_mean_absolute_error: 22.5519
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 586.1513 - mean_absolute_error: 22.3071 - val_loss: 571.7228 - val_mean_absolute_error: 22.1867
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - loss: 574.9118 - mean_absolute_error: 22.0270 - val_loss: 555.1074 - val_mean_absolute_error: 21.7863
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 560.8936 - mean_absolute_error: 21.6737 - val_loss: 536.4370 - val_mean_absolute_error: 21.3386
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 538.5806 - mean_absolute_error: 21.1309 - val_loss: 511.5604 - val_mean_absolute_error: 20.7270
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[