In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler


In [14]:
df = pd.read_csv ("D:\\Python basics\\CLTV-360\\CSV files\\rfm_segments.csv")


In [15]:
customer_data = df[['CustomerID', 'Segment']].copy()


In [16]:
X = df.drop(columns=['CustomerID', 'CLTV', 'Segment'])
y = df['CLTV']

In [17]:

X_train, X_test, y_train, y_test, cust_train, cust_test = train_test_split(
    X, y, customer_data, test_size=0.2, random_state=42)

In [18]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [19]:
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)

rmse_lr = np.sqrt(mean_squared_error(y_test, y_pred_lr))
mae_lr = mean_absolute_error(y_test, y_pred_lr)

In [20]:
xgb = XGBRegressor(objective='reg:squarederror', random_state=42)

param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.3],
    'subsample': [0.6, 0.8, 1.0]
}

In [21]:
random_search = RandomizedSearchCV(
    xgb,
    param_distributions=param_dist,
    n_iter=10,
    scoring='neg_mean_squared_error',
    cv=3,
    verbose=1,
    n_jobs=-1,
    random_state=42
)

In [22]:
random_search.fit(X_train_scaled, y_train)
best_xgb = random_search.best_estimator_


Fitting 3 folds for each of 10 candidates, totalling 30 fits


In [23]:
y_pred_xgb = best_xgb.predict(X_test_scaled)

rmse_xgb = np.sqrt(mean_squared_error(y_test, y_pred_xgb))
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)

In [24]:
print(f" Linear Regression - RMSE: {rmse_lr:.2f}, MAE: {mae_lr:.2f}")
print(f" Tuned XGBoost - RMSE: {rmse_xgb:.2f}, MAE: {mae_xgb:.2f}")


 Linear Regression - RMSE: 1168.13, MAE: 196.92
 Tuned XGBoost - RMSE: 903.60, MAE: 60.02
