# Sales Trends Analysis

This notebook focuses on exploring sales trends in the superstore dataset using various visualizations and analysis techniques.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
# Load the cleaned dataset
df = pd.read_csv('../data/cleaned_superstore_sales.csv')

# Convert date columns to datetime with the correct format (DD/MM/YYYY)
date_columns = ['Order Date', 'Ship Date']
for col in date_columns:
    if col in df.columns:
        # Using format='mixed' with dayfirst=True to handle DD/MM/YYYY format
        df[col] = pd.to_datetime(df[col], format='mixed', dayfirst=True)

# Sort by Order Date
df = df.sort_values('Order Date')

print("Dataset loaded successfully!")
print(f"Time period covered: {df['Order Date'].min()} to {df['Order Date'].max()}")

# Display a few rows to verify date conversion
print("\nSample of converted dates:")
print(df[['Order Date', 'Ship Date']].head())

Dataset loaded successfully!
Time period covered: 2015-01-03 00:00:00 to 2018-12-30 00:00:00

Sample of converted dates:
  Order Date  Ship Date
0 2015-01-03 2015-01-07
1 2015-01-04 2015-01-08
2 2015-01-04 2015-01-08
3 2015-01-04 2015-01-08
4 2015-01-05 2015-01-12


## 1. Time-based Sales Analysis

In [3]:
# Monthly Sales Trend
monthly_sales = df.groupby(pd.Grouper(key='Order Date', freq='M'))['Sales'].agg(['sum', 'count']).reset_index()
monthly_sales.columns = ['Month', 'Total Sales', 'Number of Orders']

# Create subplot with two y-axes
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=monthly_sales['Month'], y=monthly_sales['Total Sales'],
               name="Total Sales", line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=monthly_sales['Month'], y=monthly_sales['Number of Orders'],
               name="Number of Orders", line=dict(color='red')),
    secondary_y=True,
)

fig.update_layout(
    title='Monthly Sales Trend and Number of Orders',
    xaxis_title='Month',
    yaxis_title='Total Sales ($)',
    yaxis2_title='Number of Orders'
)

fig.show()

  monthly_sales = df.groupby(pd.Grouper(key='Order Date', freq='M'))['Sales'].agg(['sum', 'count']).reset_index()


In [4]:
# Seasonal Analysis
df['Month'] = df['Order Date'].dt.month
df['Year'] = df['Order Date'].dt.year

seasonal_sales = df.groupby(['Year', 'Month'])['Sales'].sum().reset_index()

fig = px.line(seasonal_sales, x='Month', y='Sales', color='Year',
              title='Monthly Sales Patterns by Year',
              labels={'Month': 'Month', 'Sales': 'Total Sales ($)', 'Year': 'Year'})

fig.show()

## 2. Category and Product Analysis

In [5]:
# Sales by Category
category_sales = df.groupby('Category')['Sales'].agg(['sum', 'count']).reset_index()
category_sales.columns = ['Category', 'Total Sales', 'Number of Orders']

fig = px.bar(category_sales, x='Category', y=['Total Sales', 'Number of Orders'],
             title='Sales and Orders by Category',
             barmode='group')
fig.show()

# Top 10 Sub-Categories
subcategory_sales = df.groupby('Sub-Category')['Sales'].sum().sort_values(ascending=False).head(10)

fig = px.bar(subcategory_sales, x=subcategory_sales.index, y='Sales',
             title='Top 10 Sub-Categories by Sales',
             labels={'index': 'Sub-Category', 'Sales': 'Total Sales ($)'})
fig.show()

## 3. Regional Analysis

In [6]:
# Sales by Region
region_sales = df.groupby('Region')['Sales'].sum().reset_index()

fig = px.pie(region_sales, values='Sales', names='Region',
             title='Sales Distribution by Region')
fig.show()

# Top 10 States by Sales
state_sales = df.groupby('State')['Sales'].sum().sort_values(ascending=False).head(10)

fig = px.bar(state_sales, x=state_sales.index, y='Sales',
             title='Top 10 States by Sales',
             labels={'index': 'State', 'Sales': 'Total Sales ($)'})
fig.show()

## 4. Customer Segment Analysis

In [7]:
# Sales by Customer Segment
segment_sales = df.groupby('Segment')['Sales'].agg(['sum', 'count']).reset_index()
segment_sales.columns = ['Segment', 'Total Sales', 'Number of Orders']

# Create two subplots
fig = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "pie"}]])

fig.add_trace(
    go.Pie(labels=segment_sales['Segment'], values=segment_sales['Total Sales'],
           name="Total Sales"),
    row=1, col=1
)

fig.add_trace(
    go.Pie(labels=segment_sales['Segment'], values=segment_sales['Number of Orders'],
           name="Number of Orders"),
    row=1, col=2
)

fig.update_layout(
    title='Sales and Orders Distribution by Customer Segment',
    annotations=[
        dict(text="Total Sales", x=0.18, y=0.5, showarrow=False, font_size=13),
        dict(text="Number of Orders", x=0.82, y=0.5, showarrow=False, font_size=13)
    ]
)

fig.show()

## 5. Key Insights Summary

In [8]:
# Calculate key metrics
print("Key Sales Metrics:")
print("-" * 50)

# Total sales
total_sales = df['Sales'].sum()
print(f"Total Sales: ${total_sales:,.2f}")

# Average order value
avg_order = df['Sales'].mean()
print(f"Average Order Value: ${avg_order:.2f}")

# Best performing category
best_category = df.groupby('Category')['Sales'].sum().idxmax()
best_category_sales = df.groupby('Category')['Sales'].sum().max()
print(f"Best Performing Category: {best_category} (${best_category_sales:,.2f})")

# Best performing region
best_region = df.groupby('Region')['Sales'].sum().idxmax()
best_region_sales = df.groupby('Region')['Sales'].sum().max()
print(f"Best Performing Region: {best_region} (${best_region_sales:,.2f})")

# Most valuable customer segment
best_segment = df.groupby('Segment')['Sales'].sum().idxmax()
best_segment_sales = df.groupby('Segment')['Sales'].sum().max()
print(f"Most Valuable Customer Segment: {best_segment} (${best_segment_sales:,.2f})")

Key Sales Metrics:
--------------------------------------------------
Total Sales: $2,261,536.78
Average Order Value: $230.77
Best Performing Category: Technology ($827,455.87)
Best Performing Region: West ($710,219.68)
Most Valuable Customer Segment: Consumer ($1,148,060.53)
