# 🧠 Walmart Sales Analysis

In [None]:
# 📌 1. Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# 📌 2. Load dataset
df = pd.read_csv("Walmart_Sales.csv")
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')

In [None]:
# 📌 3. Feature engineering
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

In [None]:
# 📊 4. Basic Exploratory Analysis
total_sales_per_store = df.groupby('Store')['Weekly_Sales'].sum().sort_values(ascending=False)
average_sales_per_store = df.groupby('Store')['Weekly_Sales'].mean().sort_values(ascending=False)

print("Highest performing store:", total_sales_per_store.idxmax())
print("Lowest performing store:", total_sales_per_store.idxmin())

holiday_sales = df[df['Holiday_Flag'] == 1]['Weekly_Sales']
non_holiday_sales = df[df['Holiday_Flag'] == 0]['Weekly_Sales']

print("Average Holiday Sales:", holiday_sales.mean())
print("Average Non-Holiday Sales:", non_holiday_sales.mean())

In [None]:
# 📈 5. Time Series Trends
sales_over_time = df.groupby('Date')['Weekly_Sales'].sum()
monthly_avg_sales = df.groupby(['Year', 'Month'])['Weekly_Sales'].mean().reset_index()
monthly_avg_sales['Date'] = pd.to_datetime(monthly_avg_sales[['Year', 'Month']].assign(DAY=1))

In [None]:
# 📉 6. Correlation Analysis
correlation_data = df[['Weekly_Sales', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment']]
correlation_matrix = correlation_data.corr()
print("
Correlation Matrix:
", correlation_matrix)

In [None]:
# 📊 7. Visualizations
plt.figure()
sales_over_time.plot(title='Total Weekly Sales Over Time')
plt.xlabel('Date')
plt.ylabel('Weekly Sales')
plt.grid(True)
plt.show()

plt.figure()
average_sales_per_store.plot(kind='bar', title='Average Weekly Sales per Store')
plt.xlabel('Store')
plt.ylabel('Average Weekly Sales')
plt.show()

plt.figure()
sns.boxplot(x='Holiday_Flag', y='Weekly_Sales', data=df)
plt.xticks([0, 1], ['Non-Holiday', 'Holiday'])
plt.title('Sales Distribution: Holiday vs Non-Holiday Weeks')
plt.xlabel('Week Type')
plt.ylabel('Weekly Sales')
plt.show()

plt.figure()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Between Variables')
plt.show()