In [1]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder




In [2]:
# Load Dataset
df = pd.read_csv("WA_Marketing-Campaign.csv")
df


Unnamed: 0,MarketID,MarketSize,LocationID,AgeOfStore,Promotion,week,SalesInThousands
0,1,Medium,1,4,3,1,33.73
1,1,Medium,1,4,3,2,35.67
2,1,Medium,1,4,3,3,29.03
3,1,Medium,1,4,3,4,39.25
4,1,Medium,2,5,2,1,27.81
...,...,...,...,...,...,...,...
543,10,Large,919,2,1,4,64.34
544,10,Large,920,14,2,1,50.20
545,10,Large,920,14,2,2,45.75
546,10,Large,920,14,2,3,44.29


In [3]:
# Preprocessing
df.columns = df.columns.str.strip().str.lower()
df = df[['marketsize', 'promotion', 'ageofstore', 'salesinthousands','week']]

In [4]:
# One-Hot Encoding
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
encoded_cols = encoder.fit_transform(df[['marketsize', 'promotion','week']])
encoded_df = pd.DataFrame(encoded_cols, columns=encoder.get_feature_names_out())



In [54]:
# Combine Encoded Data
df = df.drop(['marketsize', 'promotion','week'], axis=1)
df = pd.concat([df, encoded_df], axis=1)

In [55]:
df

Unnamed: 0,ageofstore,salesinthousands,marketsize_Large,marketsize_Medium,marketsize_Small,promotion_1,promotion_2,promotion_3,week_1,week_2,week_3,week_4
0,4,33.73,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1,4,35.67,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
2,4,29.03,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,4,39.25,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4,5,27.81,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
543,2,64.34,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
544,14,50.20,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
545,14,45.75,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
546,14,44.29,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [56]:
# Train-Test Split
X = df.drop("salesinthousands", axis=1)
y = df["salesinthousands"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [57]:
# Train Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Save Model & Encoder
joblib.dump(model, "sales_predictor.pkl")
joblib.dump(encoder, "encoder.pkl")
print("Model and encoder saved successfully!")

Model and encoder saved successfully!


In [59]:
df.to_csv('output.csv', index=False)