In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# for splitting the dataset into train and test sets
from sklearn.model_selection import train_test_split

# for normalize the values in the datasets
from sklearn.preprocessing import StandardScaler

# to calculate the MAE and MSE
from sklearn.metrics import mean_absolute_error, mean_squared_error

# various models to predict the prices
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# tensorflow of a CNN model comparing the predefined model with the custom CNN
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, InputLayer


In [None]:
# Load dataset
df = pd.read_csv('./housing.csv', header=None, delimiter='\\s+')

# name the columns for the datasets
df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE',
              'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']


In [None]:
# Split features and target
X = df.drop('MEDV', axis=1) # group the remaining columns other than MEDV
Y = df['MEDV'] # store the last column MEDV only

# Train-Test Split
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.8, random_state=42) # splut the dataset int 8:2 ratio

# Feature Scaling
scaler = StandardScaler()  # create a object for the scaler

# to give the datasets values to scaler to learn or comput the mean and standard deviation it will compute it
x_train_scaled = scaler.fit_transform(x_train)

# using the training dataset it will transform the test data too
x_test_scaled = scaler.transform(x_test)


In [None]:
# Initialize traditional ML models
models = {
    "Linear Regression": LinearRegression(),
    "Support Vector Regressor": SVR(),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(random_state=42)
}

In [None]:
# Train and evaluate traditional models
results = {}

for name, model in models.items():

    # train the models
    model.fit(x_train_scaled, y_train)

    # predict the models using test dataset
    y_pred = model.predict(x_test_scaled)

    # calculate the mean squared error 
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    # mean absoulute error
    mae = mean_absolute_error(y_test, y_pred)

    # store the results back to the dictionary
    results[name] = (rmse, mae)


In [None]:
# Simple CNN model
# Reshape for Conv1D: (samples, timesteps, features) -> (num_samples, 13, 1)
x_train_cnn = x_train_scaled.reshape(-1, x_train_scaled.shape[1], 1)
x_test_cnn = x_test_scaled.reshape(-1, x_test_scaled.shape[1], 1)

cnn_model = Sequential([
    InputLayer(input_shape=(x_train_cnn.shape[1], 1)),
    Conv1D(64, kernel_size=2, activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1) # ouput will be the float as price
])

# compile the model
cnn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the CNN model
cnn_model.fit(x_train_cnn, y_train, epochs=100, batch_size=16, verbose=0)

# Evaluate CNN
y_pred_cnn = cnn_model.predict(x_test_cnn).flatten()
rmse_cnn = np.sqrt(mean_squared_error(y_test, y_pred_cnn))
mae_cnn = mean_absolute_error(y_test, y_pred_cnn)

# store the cnn results too
results["CNN"] = (rmse_cnn, mae_cnn)


In [None]:
# Display Results
print("Model Evaluation (RMSE & MAE):")
for name, (rmse, mae) in results.items():
    print(f"{name:<25} -> RMSE: {rmse:.2f}, MAE: {mae:.2f}")


In [None]:
# Plot Actual vs Predicted for the best model (lowest RMSE) 
best_model_name = min(results, key=lambda x: results[x][0]) # get the best model based on rmse
 
if best_model_name == "CNN":
    y_pred_best = y_pred_cnn
else:
    y_pred_best = models[best_model_name].predict(x_test_scaled) # if the model is perdefined model predict the test results again because we dont store it anywhere 

plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred_best, alpha=0.7)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.xlabel("Actual MEDV")
plt.ylabel("Predicted MEDV")
plt.title(f"{best_model_name}: Actual vs Predicted MEDV")
plt.grid(True)
plt.tight_layout()
plt.show()
