In [None]:
#!pip install tensorflow

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
import pandas as pd
import numpy as np
from utils.transformations import ExtendedTransformation
from utils.filters import SimpleFilter

In [3]:
df_train = pd.read_csv("data/preprocessed/train_data.csv")
X_train, y_train = df_train.drop(columns=['Price']), df_train[['Price']]

In [4]:
preprocessor = ExtendedTransformation()
filter = SimpleFilter()

In [6]:
preprocessor.fit(X_train, y_train)

X shape:  (20974, 40)
bin_vars_columns shape:  (36,)
low_card_columns shape:  37


In [7]:
X_t, y_t = preprocessor.transform(X_train, y_train)

X shape:  (20974, 40)
X_low_card   shape:  (20974, 113)
X_high_card shape:  (20974, 50)
X_crossed_features shape:  (20974, 6670)
X_EXPANDED shape:  (20974, 6835)


In [8]:
filter.fit(X_t, y_t)

(20974, 6835)
(20974, 4173)
(20974, 3193)
(20974, 1635)


In [9]:
X_f, y_f = filter.transform(X_t, y_t)

(20974, 4173)
(20974, 3193)
(20974, 1635)


In [10]:
model = models.Sequential([
    layers.Dense(800, activation='relu', input_shape=(X_f.shape[1],)),
    layers.Dropout(0.2),
    layers.Dense(400, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(200, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)  # Single output for regression
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.compile(optimizer='adam',
              loss='mse')

In [None]:
model.fit(X_f, y_f,epochs=150, batch_size=32, )

Epoch 1/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3868
Epoch 2/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3925
Epoch 3/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3915
Epoch 4/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3883
Epoch 5/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3811
Epoch 6/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3858
Epoch 7/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3823
Epoch 8/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 0.3933
Epoch 9/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - loss: 0.3962
Epoch 10/150
[1m656/656[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms

<keras.src.callbacks.history.History at 0x31efd59a0>

In [58]:
df_test = pd.read_csv("data/preprocessed/test_data.csv")
X_test, y_test = df_test.drop(columns=['Price']), df_test[['Price']]

In [59]:
X_test_proccesed, y_test_proccessed = preprocessor.transform(X_test, y_test)
X_test_filtered, y_test_filtered = filter.transform(X_test_proccesed, y_test_proccessed)

X shape:  (8989, 40)
X_low_card   shape:  (8989, 113)
X_high_card shape:  (8989, 50)
X_crossed_features shape:  (8989, 6670)
X_EXPANDED shape:  (8989, 6835)
(8989, 4173)
(8989, 3193)
(8989, 1635)


In [60]:
y_hat = model(X_test_filtered)

In [61]:
y_hat_unscaled = preprocessor.inverse_transform(y_hat)



In [62]:
from sklearn.metrics import ( root_mean_squared_error, 
                             mean_absolute_error, 
                             mean_absolute_percentage_error )

In [63]:
y_true = y_test.values
y_pred = y_hat_unscaled
rmse = root_mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
mape = mean_absolute_percentage_error(y_true, y_pred)

# Format metrics with units and percentages
metrics = {
    "RMSE (₹)": f"{rmse:,.2f}",
    "MAE (₹)": f"{mae:,.2f}",
    "MAPE (%)": f"{mape:.2%}"
}

# Display metrics in a formatted table
print("\nModel Performance Metrics:\n")
print(f"{'Metric':<15} {'Value':>15}")
print("-" * 30)
for metric, value in metrics.items():
    print(f"{metric:<15} {value:>15}")

# Add a summary interpretation
print("\nInterpretation:")
print(f"- RMSE: The model's predictions are typically off by ₹{rmse:,.2f} on average")
print(f"- MAE: The average absolute error is ₹{mae:,.2f}")
print(f"- MAPE: The predictions are off by {mape:.1%} on average")


Model Performance Metrics:

Metric                    Value
------------------------------
RMSE (₹)          27,770,639.70
MAE (₹)            6,659,093.49
MAPE (%)                 44.86%

Interpretation:
- RMSE: The model's predictions are typically off by ₹27,770,639.70 on average
- MAE: The average absolute error is ₹6,659,093.49
- MAPE: The predictions are off by 44.9% on average
