# 🛒 Retail Analysis Project with EDA and Machine Learning
This project includes Sales Trend Analysis, EDA, and Market Basket Analysis. We'll also use ML algorithms to predict sales.

## 📊 Load and Display Data

In [None]:
import pandas as pd

# Load the retail sales data
df = pd.read_csv('RetailSales.csv')  # Replace with correct path
df.head()

## 🧹 Exploratory Data Analysis (EDA)

In [None]:
df.info()
df.describe()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

df['Date'] = pd.to_datetime(df['Date'])
plt.figure(figsize=(12, 6))
sns.lineplot(data=df.groupby('Date')['Weekly_Sales'].sum().reset_index(), x='Date', y='Weekly_Sales')
plt.title('Total Weekly Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 🛠️ Data Preprocessing

In [None]:
df = df.dropna()
df = df[df['Weekly_Sales'] > 0]

# Feature engineering
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['Year'] = df['Date'].dt.year

# Select features and target
X = df[['Store', 'Dept', 'Month', 'Day', 'Year']]
y = df['Weekly_Sales']

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 🤖 Machine Learning Model - Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluation Metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.2f}")

## 🧪 Hyperparameter Tuning - GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10, 20]
}

grid_search = GridSearchCV(estimator=RandomForestRegressor(random_state=42),
                           param_grid=param_grid,
                           scoring='r2',
                           cv=3,
                           n_jobs=-1)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

# Evaluate best model
best_pred = best_model.predict(X_test)
best_r2 = r2_score(y_test, best_pred)
print(f"Best R² after tuning: {best_r2:.2f}")