In [None]:
# 📦 Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from google.colab import files

# 📤 Upload and load dataset
uploaded = files.upload()
df = pd.read_csv(next(iter(uploaded)))
print("✅ Data loaded — preview:")
df.head()

# 🔁 Encode categorical variables
data = df.copy()
categorical_cols = ['Gender', 'City', 'Occupation', 'Product_Category',
                    'Weather', 'Time_of_Day', 'Loyalty_Tier', 'Persona']

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# 🎯 Define features and target
features = ['Age', 'Gender', 'City', 'Occupation', 'Product_Category',
            'Weather', 'Time_of_Day', 'Loyalty_Tier', 'CustomerSegment', 'Persona']
target = 'Purchase_Amount'

X = data[features]
y = data[target]

# 🔀 Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("✅ Data preprocessed and split.")

# 🧠 Train model
model = GradientBoostingRegressor(random_state=42)
model.fit(X_train, y_train)

# 📊 Evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("📈 Model Evaluation:")
print(f"MAE: {mae:.2f} | RMSE: {rmse:.2f} | R²: {r2:.3f}")

# 🏷️ Predict on full data and cap at actual price
predicted_prices = model.predict(X)
df['PredictedPrice'] = np.minimum(predicted_prices, df['Purchase_Amount'])

print("🔍 Sample predicted (discounted) prices:\n")
print(df[['User_ID', 'Product_ID', 'Persona', 'Purchase_Amount', 'PredictedPrice']].head(10))

# 🔎 Extract known categories from label encoders
known_categories = {
    col: list(label_encoders[col].classes_)
    for col in categorical_cols
}

# ✅ Safe encode with warning
def safe_encode_with_log(value, allowed_list, col_name):
    if value in allowed_list:
        return allowed_list.index(value)
    else:
        print(f"⚠️ Unseen value in {col_name}: {value}")
        return -1

# 🧾 Example: Dynamic price prediction with unseen values
sample_input = {
    'Age': 29,
    'Gender': 'Non-Binary',           # ❌ Unseen
    'City': 'Metro',                  # ❌ Unseen
    'Occupation': 'Artist',           # ❌ Unseen
    'Product_Category': 'Accessories',# ❌ Unseen
    'Weather': 'Snowy',               # ❌ Unseen
    'Time_of_Day': 'Late Night',      # ❌ Unseen
    'Loyalty_Tier': 'Diamond',        # ❌ Unseen
    'CustomerSegment': 1,             # ✅ Known
    'Persona': 'Premium Buyer'        # ✅ If present in training
}

# 🧠 Encode input sample
encoded_sample = {
    'Age': sample_input['Age'],
    'Gender': safe_encode_with_log(sample_input['Gender'], known_categories['Gender'], 'Gender'),
    'City': safe_encode_with_log(sample_input['City'], known_categories['City'], 'City'),
    'Occupation': safe_encode_with_log(sample_input['Occupation'], known_categories['Occupation'], 'Occupation'),
    'Product_Category': safe_encode_with_log(sample_input['Product_Category'], known_categories['Product_Category'], 'Product_Category'),
    'Weather': safe_encode_with_log(sample_input['Weather'], known_categories['Weather'], 'Weather'),
    'Time_of_Day': safe_encode_with_log(sample_input['Time_of_Day'], known_categories['Time_of_Day'], 'Time_of_Day'),
    'Loyalty_Tier': safe_encode_with_log(sample_input['Loyalty_Tier'], known_categories['Loyalty_Tier'], 'Loyalty_Tier'),
    'CustomerSegment': sample_input['CustomerSegment'],
    'Persona': safe_encode_with_log(sample_input['Persona'], known_categories['Persona'], 'Persona')
}

# 🧮 Predict personalized price and enforce discount rule
input_df = pd.DataFrame([encoded_sample])[features]
predicted_price = model.predict(input_df)[0]

# You may know user's original price (e.g., via lookup); here we assume avg as fallback
fallback_price = df['Purchase_Amount'].mean()
final_price = min(predicted_price, fallback_price)

print(f"🎯 Predicted Personalized Price (discount-only): ${final_price:.2f}")


Saving Segmented_Customers_With_Personas (1).csv to Segmented_Customers_With_Personas (1).csv
✅ Data loaded — preview:
✅ Data preprocessed and split.
📈 Model Evaluation:
MAE: 6.93 | RMSE: 9.55 | R²: 0.987
🔍 Sample predicted (discounted) prices:

  User_ID Product_ID          Persona  Purchase_Amount  PredictedPrice
0  U10000       P442    Premium Buyer           286.55      270.621069
1  U10001       P576    Premium Buyer           249.10      249.100000
2  U10002       P681   Bargain Hunter           148.20      135.511979
3  U10003       P971     Budget Buyer            75.71       75.710000
4  U10004       P322    Premium Buyer            94.81       94.810000
5  U10005       P157    Premium Buyer           264.07      258.583070
6  U10006       P721     Budget Buyer            43.92       40.232047
7  U10007       P770  Impulsive Buyer           126.37      126.370000
8  U10008       P393  Impulsive Buyer            61.09       61.090000
9  U10009       P644     Budget Buyer       

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
save_dir = "/content/drive/MyDrive/DynamicPricingModels"
os.makedirs(save_dir, exist_ok=True)

# 💾 Save the model using joblib
import joblib
model_path = f"{save_dir}/dynamic_price_model.pkl"
joblib.dump(model, model_path)
print(f"✅ Model saved to: {model_path}")

✅ Model saved to: /content/drive/MyDrive/DynamicPricingModels/dynamic_price_model.pkl
