In [26]:
import pandas as pd
import numpy as np
import random
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [29]:
# A = pd.read_csv("./retail_price.csv")
df = pd.read_csv("./retail_store_inventory.csv")

In [None]:
# A.columns

Index(['product_id', 'product_category_name', 'month_year', 'qty',
       'total_price', 'freight_price', 'unit_price', 'product_name_lenght',
       'product_description_lenght', 'product_photos_qty', 'product_weight_g',
       'product_score', 'customers', 'weekday', 'weekend', 'holiday', 'month',
       'year', 's', 'volume', 'comp_1', 'ps1', 'fp1', 'comp_2', 'ps2', 'fp2',
       'comp_3', 'ps3', 'fp3', 'lag_price'],
      dtype='object')

In [30]:
df.head()

Unnamed: 0,Date,Store ID,Product ID,Category,Region,Inventory Level,Units Sold,Units Ordered,Demand Forecast,Price,Discount,Weather Condition,Holiday/Promotion,Competitor Pricing,Seasonality
0,2022-01-01,S001,P0001,Groceries,North,231,127,55,135.47,33.5,20,Rainy,0,29.69,Autumn
1,2022-01-01,S001,P0002,Toys,South,204,150,66,144.04,63.01,20,Sunny,0,66.16,Autumn
2,2022-01-01,S001,P0003,Toys,West,102,65,51,74.02,27.99,10,Sunny,1,31.32,Summer
3,2022-01-01,S001,P0004,Toys,North,469,61,164,62.18,32.72,10,Cloudy,1,34.74,Autumn
4,2022-01-01,S001,P0005,Electronics,East,166,14,135,9.26,73.64,0,Sunny,0,68.95,Summer


In [31]:
np.random.seed(42)
df['cost_price'] = df['Price'] * np.random.uniform(0.6, 0.8, size=len(df))

<h2>Model 1</h2>

In [45]:
# 1. GENERATE HYBRID DATA (Cheap & Expensive)
data = []

def generate_item(base_price):
    cost = base_price * random.uniform(0.6, 0.8) # Cost is 60-80% of price
    stock = random.randint(0, 100)
    
    # Competitor Price (The Anchor)
    competitor = base_price * random.uniform(0.95, 1.2)
    
    # Sales Velocity
    sold = random.randint(0, 200)
    ordered = random.randint(0, 50)
    
    # --- TEACHING THE "DISCOUNT STRATEGY" ---
    # We don't predict price. We predict the MULTIPLIER (e.g., 0.98)
    
    target_multiplier = 0.98 # Default: Undercut by 2%
    
    # Rule 1: High Demand? Charge Premium (1.05x)
    if sold > 100:
        target_multiplier = 1.05 
        
    # Rule 2: Low Stock? Charge Premium (1.10x)
    if stock < 10:
        target_multiplier = 1.10
        
    # Rule 3: Clearance? Deep Discount (0.85x)
    if sold < 5 and stock > 50:
        target_multiplier = 0.85
        
    return [cost, stock, competitor, sold, ordered, target_multiplier]

# Generate 2000 normal items ($10 - $100)
for _ in range(2000):
    data.append(generate_item(random.uniform(10, 100)))

# Generate 500 Luxury items ($1,000 - $10,000)
for _ in range(500):
    data.append(generate_item(random.uniform(1000, 10000)))
df = pd.DataFrame(data, columns=[
    'cost_price', 'stock_level', 'competitor_price', 
    'units_sold', 'units_ordered', 'target_multiplier'
])
df['cost_ratio'] = df['cost_price'] / df['competitor_price'] # How tight is our margin?

features = ['cost_ratio', 'stock_level', 'units_sold', 'units_ordered']
target = 'target_multiplier'

# 3. TRAIN
X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5
)
model.fit(X_train, y_train)

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [39]:
df = df.rename(columns={
    'Inventory Level': 'stock_level',
    'Competitor Pricing': 'competitor_price',
    'Price': 'target_price', # This is what we want the AI to learn to predict,
    'Units Ordered': 'units_ordered',
    'Units Sold': 'units_sold'
})

<h2>Model 2</h2>

In [33]:
df.head()

Unnamed: 0,Date,Store ID,Product ID,Category,Region,stock_level,Units Sold,Units Ordered,Demand Forecast,target_price,Discount,Weather Condition,Holiday/Promotion,competitor_price,Seasonality,cost_price
0,2022-01-01,S001,P0001,Groceries,North,231,127,55,135.47,33.5,20,Rainy,0,29.69,Autumn,22.609419
1,2022-01-01,S001,P0002,Toys,South,204,150,66,144.04,63.01,20,Sunny,0,66.16,Autumn,49.786902
2,2022-01-01,S001,P0003,Toys,West,102,65,51,74.02,27.99,10,Sunny,1,31.32,Summer,20.891702
3,2022-01-01,S001,P0004,Toys,North,469,61,164,62.18,32.72,10,Cloudy,1,34.74,Autumn,23.549621
4,2022-01-01,S001,P0005,Electronics,East,166,14,135,9.26,73.64,0,Sunny,0,68.95,Summer,46.481843


In [40]:
features = ['cost_price', 'stock_level', 'competitor_price','units_ordered', 'units_sold']
target = 'target_price'
df_clean = df[features + [target]].dropna()

In [None]:
# !pip install xgboost
# !pip install scikit-learn
X = df_clean[features]
y = df_clean[target]

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5
)
model.fit(X_train, y_train)

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


<h2>Prediction</h2?>

In [46]:
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print(f"✅ Model Trained on {len(df_clean)} real transactions.")
print(f"📊 Accuracy Check: On average, the AI is within ${mae:.2f} of the optimal price.")

✅ Model Trained on 73100 real transactions.
📊 Accuracy Check: On average, the AI is within $0.00 of the optimal price.


In [47]:
model.save_model("pricing_model2.json")