In [21]:
import numpy as np
import pandas as pd

from sklearn.metrics import mean_squared_error, r2_score
from catboost import CatBoostRegressor
from xgboost import XGBRegressor

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError

import keras_tuner as kt

In [3]:
train = pd.read_csv("../artifacts/data_transformation/train.csv")
test = pd.read_csv("../artifacts/data_transformation/test.csv")

X_train = train.drop(columns = "Listening_Time_minutes")
y_train = train["Listening_Time_minutes"]

X_test = test.drop(columns = "Listening_Time_minutes")
y_test = test["Listening_Time_minutes"]

## Catboost Model

In [4]:
cat_model = CatBoostRegressor(
    iterations=1000,
    learning_rate=0.1,
    depth=6,
    random_seed=42,
    eval_metric="RMSE",
    use_best_model=True,
    verbose=10
)

cat_model.fit(
    X_train, y_train,
    eval_set=(X_test, y_test),
    early_stopping_rounds=50
)

0:	learn: 25.1823205	test: 25.4269461	best: 25.4269461 (0)	total: 172ms	remaining: 2m 52s
10:	learn: 15.8471974	test: 16.1457308	best: 16.1457308 (10)	total: 315ms	remaining: 28.3s
20:	learn: 13.9632069	test: 14.2776942	best: 14.2776942 (20)	total: 454ms	remaining: 21.2s
30:	learn: 13.6226223	test: 13.9376083	best: 13.9376083 (30)	total: 591ms	remaining: 18.5s
40:	learn: 13.5392980	test: 13.8642984	best: 13.8642984 (40)	total: 714ms	remaining: 16.7s
50:	learn: 13.4939599	test: 13.8393509	best: 13.8393509 (50)	total: 840ms	remaining: 15.6s
60:	learn: 13.4667935	test: 13.8275968	best: 13.8275968 (60)	total: 971ms	remaining: 14.9s
70:	learn: 13.4392635	test: 13.8234380	best: 13.8234380 (70)	total: 1.13s	remaining: 14.8s
80:	learn: 13.4149752	test: 13.8190967	best: 13.8190967 (80)	total: 1.25s	remaining: 14.1s
90:	learn: 13.3904706	test: 13.8148485	best: 13.8148485 (90)	total: 1.36s	remaining: 13.6s
100:	learn: 13.3682651	test: 13.8120494	best: 13.8109321 (98)	total: 1.48s	remaining: 13.2s

<catboost.core.CatBoostRegressor at 0x28ff5ed89b0>

In [5]:
# Validation
predictions = cat_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
r2 = r2_score(y_test, predictions)
print(f"CatBoost RMSE: {rmse}")
print(f"CatBoost R2: {r2}")

CatBoost RMSE: 13.804987040030186
CatBoost R2: 0.7449047199293921


## XG Boost

In [6]:
xgb_model = XGBRegressor(
    n_estimators=1000,
    learning_rate=0.1,
    max_depth=6,
    random_state=42,
    early_stopping_rounds=50,
    eval_metric="rmse"
)

xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_test, y_test)],
    verbose=10
)

[0]	validation_0-rmse:25.34840
[10]	validation_0-rmse:15.74967
[20]	validation_0-rmse:14.10290
[30]	validation_0-rmse:13.88458
[40]	validation_0-rmse:13.86473
[50]	validation_0-rmse:13.88280
[60]	validation_0-rmse:13.89142
[70]	validation_0-rmse:13.90377
[80]	validation_0-rmse:13.91425
[88]	validation_0-rmse:13.91613


In [7]:
# Validation
predictions = xgb_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
r2 = r2_score(y_test, predictions)
print(f"CatBoost RMSE: {rmse}")
print(f"CatBoost R2: {r2}")

CatBoost RMSE: 13.85779393417628
CatBoost R2: 0.7429494042637145


## Artificial Neural Networks

In [50]:
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='relu'),
    Dropout(0.2),
    Dense(1)
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [51]:
model.summary()

In [52]:
# Compile the model
optimizer = Adam(learning_rate = 0.01)

model.compile(
    optimizer=optimizer,
    loss='mean_squared_error',
    metrics=['RootMeanSquaredError']
)

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    batch_size = 256,
    validation_data=(X_test, y_test),
    epochs=100,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - RootMeanSquaredError: 26.2354 - loss: 710.5206 - val_RootMeanSquaredError: 15.1378 - val_loss: 229.1530
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - RootMeanSquaredError: 19.0436 - loss: 362.6822 - val_RootMeanSquaredError: 14.1425 - val_loss: 200.0101
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - RootMeanSquaredError: 18.4157 - loss: 339.1650 - val_RootMeanSquaredError: 14.1669 - val_loss: 200.6998
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - RootMeanSquaredError: 18.3810 - loss: 337.8829 - val_RootMeanSquaredError: 15.9049 - val_loss: 252.9657
Epoch 5/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - RootMeanSquaredError: 18.3515 - loss: 336.7934 - val_RootMeanSquaredError: 13.9925 - val_loss: 195.7887
Epoch 6/100
[1m141/141[0m [32m

In [53]:
# Validation
predictions = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
r2 = r2_score(y_test, predictions)
print(f"ANN RMSE: {rmse}")
print(f"ANN R2: {r2}")

[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
ANN RMSE: 13.93934351322528
ANN R2: 0.7399151482965942
