<a href="https://colab.research.google.com/github/meghabk2002/supermart_grocery_sales/blob/main/megha_bk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np

# Load your data
df = pd.read_csv('/content/sample_data/Supermart Grocery Sales - Retail Analytics Dataset.csv')

# Preprocessing
df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=False, errors='coerce')
df.dropna(subset=['Order Date'], inplace=True)

df['Month'] = df['Order Date'].dt.month
df['DayOfWeek'] = df['Order Date'].dt.dayofweek
df['Net Sales'] = df['Sales'] * (1 - df['Discount'])
df['Profit Margin'] = df['Profit'] / df['Sales']

# Features and target
X = df[['Category', 'Sub Category', 'Region', 'State', 'Sales', 'Discount', 'Net Sales', 'Month', 'DayOfWeek']]
y = df['Profit']

# Encoding pipeline
categorical = ['Category', 'Sub Category', 'Region', 'State']

preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical)
], remainder='passthrough')

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train & evaluate
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))

# Combine predictions with test data for export
results_df = X_test.copy()
results_df['Actual Profit'] = y_test.values
results_df['Predicted Profit'] = y_pred

# Export to CSV
results_df.to_csv('rf_predictions.csv', index=False)
print("Predictions saved to 'rf_predictions.csv'")


from google.colab import files
files.download('rf_predictions.csv')

MAE: 157.08176872682324
RMSE: 199.17983347404362
R2 Score: 0.28818891798258817
Predictions saved to 'rf_predictions.csv'


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>