In [3]:
# # -----------------------------
# # 1️⃣ Imports
# # -----------------------------
# import pandas as pd
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
# from xgboost import XGBClassifier
# from sklearn.metrics import accuracy_score, r2_score, mean_absolute_error
#
# # -----------------------------
# # 2️⃣ Load dataset
# # -----------------------------
# df = pd.read_csv('../data/p4rocessed/data.csv')
#
# # -----------------------------
# # 3️⃣ Add realistic noise
# # -----------------------------
# np.random.seed(42)
# df['Total_Purchases'] = df['Total_Purchases'] + np.random.randint(-1, 2, size=len(df))
# df['Total_Purchases'] = df['Total_Purchases'].clip(lower=1)  # ensure no negative purchases
#
# df['Amount'] = df['Amount'] + np.random.normal(0, 5, size=len(df))
# df['Amount'] = df['Amount'].clip(lower=1)  # ensure no negative amounts
#
# # Slightly randomize Customer_Segment
# df['Customer_Segment'] = df['Customer_Segment'].astype('category').cat.codes
# # Add small noise in segments (simulate realistic behavior)
# mask = np.random.rand(len(df)) < 0.05  # 5% of rows flip segment
# df.loc[mask, 'Customer_Segment'] = (df.loc[mask, 'Customer_Segment'] + 1) % df['Customer_Segment'].nunique()
#
# # -----------------------------
# # 4️⃣ Encode categorical features
# # -----------------------------
# for col in ['Gender', 'Income', 'Product_Category', 'Shipping_Method', 'Payment_Method']:
#     df[col] = df[col].astype('category').cat.codes
#
# # -----------------------------
# # 5️⃣ Features and targets
# # -----------------------------
# features = ['Age','Gender','Income','Total_Purchases','Total_Amount',
#             'Product_Category','Shipping_Method','Payment_Method','Ratings']
#
# # Classification: Customer_Segment
# X_cls = df[features]
# y_cls = df['Customer_Segment']
#
# # Regression: Amount
# X_reg = df[features]
# y_reg = df['Amount']
#
# # Train/test split
# X_cls_train, X_cls_test, y_cls_train, y_cls_test = train_test_split(X_cls, y_cls, test_size=0.2, random_state=42)
# X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)
#
# # -----------------------------
# # 6️⃣ Classification: Random Forest
# # -----------------------------
# rf_cls = RandomForestClassifier(n_estimators=300, max_depth=12, random_state=42)
# rf_cls.fit(X_cls_train, y_cls_train)
# y_pred_rf_cls = rf_cls.predict(X_cls_test)
# print("Random Forest Classifier Accuracy:", round(accuracy_score(y_cls_test, y_pred_rf_cls), 3))
#
# # -----------------------------
# # 7️⃣ Classification: XGBoost
# # -----------------------------
# xgb_cls = XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.1,
#                         use_label_encoder=False, eval_metric='mlogloss', random_state=42)
# xgb_cls.fit(X_cls_train, y_cls_train)
# y_pred_xgb_cls = xgb_cls.predict(X_cls_test)
# print("XGBoost Classifier Accuracy:", round(accuracy_score(y_cls_test, y_pred_xgb_cls), 3))
#
# # -----------------------------
# # 8️⃣ Regression: Random Forest
# # -----------------------------
# rf_reg = RandomForestRegressor(n_estimators=300, max_depth=12, random_state=42)
# rf_reg.fit(X_reg_train, y_reg_train)
# y_pred_rf_reg = rf_reg.predict(X_reg_test)
# print("\nRandom Forest Regressor for Amount")
# print("R² Score:", round(r2_score(y_reg_test, y_pred_rf_reg), 3))
# print("MAE:", round(mean_absolute_error(y_reg_test, y_pred_rf_reg), 3))


Random Forest Classifier Accuracy: 0.954


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Classifier Accuracy: 0.953

Random Forest Regressor for Amount
R² Score: 0.996
MAE: 11.634


In [4]:
# -----------------------------
# 1️⃣ Imports
# -----------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, r2_score, mean_absolute_error

# -----------------------------
# 2️⃣ Load dataset
# -----------------------------
df = pd.read_csv('data.csv')

# -----------------------------
# 3️⃣ Encode categorical features
# -----------------------------
for col in ['Gender', 'Income', 'Customer_Segment', 'Product_Category', 'Shipping_Method', 'Payment_Method']:
    df[col] = df[col].astype('category').cat.codes

# -----------------------------
# 4️⃣ Features and targets
# -----------------------------
features = ['Age','Gender','Income','Total_Purchases','Total_Amount',
            'Product_Category','Shipping_Method','Payment_Method','Ratings']

# Classification: Customer_Segment
X_cls = df[features]
y_cls = df['Customer_Segment']

# Regression: Amount
X_reg = df[features]
y_reg = df['Amount']

# Train/test split
X_cls_train, X_cls_test, y_cls_train, y_cls_test = train_test_split(X_cls, y_cls, test_size=0.2, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# -----------------------------
# 5️⃣ Classification: Random Forest
# -----------------------------
rf_cls = RandomForestClassifier(n_estimators=300, max_depth=12, random_state=42)
rf_cls.fit(X_cls_train, y_cls_train)
y_pred_rf_cls = rf_cls.predict(X_cls_test)
print("Random Forest Classifier Accuracy:", round(accuracy_score(y_cls_test, y_pred_rf_cls), 3))

# -----------------------------
# 6️⃣ Classification: XGBoost
# -----------------------------
xgb_cls = XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.1,
                        use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_cls.fit(X_cls_train, y_cls_train)
y_pred_xgb_cls = xgb_cls.predict(X_cls_test)
print("XGBoost Classifier Accuracy:", round(accuracy_score(y_cls_test, y_pred_xgb_cls), 3))

# -----------------------------
# 7️⃣ Regression: Random Forest
# -----------------------------
rf_reg = RandomForestRegressor(n_estimators=300, max_depth=12, random_state=42)
rf_reg.fit(X_reg_train, y_reg_train)
y_pred_rf_reg = rf_reg.predict(X_reg_test)
print("\nRandom Forest Regressor for Amount")
print("R² Score:", round(r2_score(y_reg_test, y_pred_rf_reg), 3))
print("MAE:", round(mean_absolute_error(y_reg_test, y_pred_rf_reg), 3))


Random Forest Classifier Accuracy: 0.967


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoost Classifier Accuracy: 0.966

Random Forest Regressor for Amount
R² Score: 0.996
MAE: 11.36
