# 📊 Sales Data Analysis - Jupyter Notebook

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Set style
sns.set(style='whitegrid')

In [None]:
# Load data
df = pd.read_csv('../data/superstore_sales.csv')
df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')
df['Ship Date'] = pd.to_datetime(df['Ship Date'], errors='coerce')

In [None]:
# Feature engineering
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.month

### 1️⃣ Sales by Region

In [None]:
region_sales = df.groupby('Region')['Sales'].sum().reset_index()
sns.barplot(data=region_sales, x='Region', y='Sales')
plt.title('Sales by Region')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### 2️⃣ Monthly Sales Trend

In [None]:
monthly_sales = df.groupby(['Year', 'Month'])['Sales'].sum().reset_index()
monthly_sales['Date'] = pd.to_datetime(monthly_sales[['Year', 'Month']].assign(DAY=1))

sns.lineplot(data=monthly_sales, x='Date', y='Sales')
plt.title('Monthly Sales Trend')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### 3️⃣ Profit vs Sales (Interactive - Plotly)

In [None]:
fig = px.scatter(df, x='Sales', y='Profit', color='Category',
                 size='Quantity', hover_data=['Sub-Category'])
fig.update_layout(title='Sales vs Profit Scatter Plot')
fig.show()