In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [3]:
df = pd.read_csv('Project4.csv')

In [4]:
df.head()

Unnamed: 0,ProductID,ProductType,Region,UnitsSold,OnPromotion,DemandCategory
0,edb820,Clothing,West,306,0,High
1,5b31ca,Gadget,West,231,1,High
2,30d43e,Clothing,West,400,1,High
3,fd9f14,Gadget,East,226,1,High
4,5e999c,Clothing,West,162,1,Medium


In [None]:
df.isnull().sum() #no nulls

ProductID         0
ProductType       0
Region            0
UnitsSold         0
OnPromotion       0
DemandCategory    0
dtype: int64

In [6]:
# Encoding categorical variables
label_cols = ['ProductType', 'Region', 'DemandCategory']
label_encoders = {}

In [7]:
for col in label_cols:
    le = LabelEncoder()
    df[col + '_Encoded'] = le.fit_transform(df[col])
    label_encoders[col] = le  

In [11]:
df.head()

Unnamed: 0,ProductID,ProductType,Region,UnitsSold,OnPromotion,DemandCategory,ProductType_Encoded,Region_Encoded,DemandCategory_Encoded,AvgSalesPerDay,PromoFrequency
0,edb820,Clothing,West,306,0,High,1,3,0,43.714286,0.0
1,5b31ca,Gadget,West,231,1,High,2,3,0,33.0,1.0
2,30d43e,Clothing,West,400,1,High,1,3,0,57.142857,1.0
3,fd9f14,Gadget,East,226,1,High,2,0,0,32.285714,1.0
4,5e999c,Clothing,West,162,1,Medium,1,3,2,23.142857,1.0


In [9]:
#Average sales per day assuming its weekly sales
df['AvgSalesPerDay'] = df['UnitsSold'] / 7

In [10]:
#promo frequemcy:
promo_freq = df.groupby('ProductID')['OnPromotion'].mean().reset_index()
promo_freq.columns = ['ProductID', 'PromoFrequency']
df = df.merge(promo_freq, on='ProductID', how='left')

In [12]:
#Scaling:
scaler = MinMaxScaler()
scaled_cols = ['UnitsSold', 'AvgSalesPerDay', 'PromoFrequency']

df_scaled = df.copy()
df_scaled[scaled_cols] = scaler.fit_transform(df[scaled_cols])

In [13]:
df_scaled.head()

Unnamed: 0,ProductID,ProductType,Region,UnitsSold,OnPromotion,DemandCategory,ProductType_Encoded,Region_Encoded,DemandCategory_Encoded,AvgSalesPerDay,PromoFrequency
0,edb820,Clothing,West,0.610063,0,High,1,3,0,0.610063,0.0
1,5b31ca,Gadget,West,0.45283,1,High,2,3,0,0.45283,1.0
2,30d43e,Clothing,West,0.807128,1,High,1,3,0,0.807128,1.0
3,fd9f14,Gadget,East,0.442348,1,High,2,0,0,0.442348,1.0
4,5e999c,Clothing,West,0.308176,1,Medium,1,3,2,0.308176,1.0


In [15]:
features = ['UnitsSold', 'AvgSalesPerDay', 'PromoFrequency', 
            'ProductType_Encoded', 'Region_Encoded']
target = 'DemandCategory_Encoded'

In [24]:
# Encoding target (High = 0, Medium = 1, Low = 2)
if target not in df.columns:
    le_target = LabelEncoder()
    df[target] = le_target.fit_transform(df['DemandCategory'])

In [25]:
#X-y Splitting
X = df[features]
y = df[target]

In [26]:
#Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [27]:
#build Random forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

In [29]:
print("\n🎯 Random Forest Classification Report:")
print(classification_report(y_test, rf_preds))


🎯 Random Forest Classification Report:
              precision    recall  f1-score   support

           0       0.25      0.50      0.33         2
           1       0.50      0.33      0.40         3
           2       0.75      0.60      0.67         5

    accuracy                           0.50        10
   macro avg       0.50      0.48      0.47        10
weighted avg       0.57      0.50      0.52        10



In [30]:
#SVM model
svm_model = SVC(kernel='linear')  # Linear SVM for interpretability
svm_model.fit(X_train, y_train)
svm_preds = svm_model.predict(X_test)

In [31]:
print("\n🎯 SVM Classification Report:")
print(classification_report(y_test, svm_preds))


🎯 SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.50      0.33      0.40         3
           2       0.50      0.40      0.44         5

    accuracy                           0.30        10
   macro avg       0.33      0.24      0.28        10
weighted avg       0.40      0.30      0.34        10

