# Sales Performance Dashboard Analysis - Final Version
## Data Science Assignment

This notebook creates comprehensive visualizations and saves them automatically to the visualizations directory.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import os
import warnings
warnings.filterwarnings('ignore')

# Create visualizations directory if it doesn't exist
os.makedirs('visualizations', exist_ok=True)

# Set style for better visualizations
plt.style.use('default')
sns.set_theme(style="whitegrid")

# Define color palette
colors = ['#2E86C1', '#3498DB', '#5DADE2', '#85C1E9', '#AED6F1']

# Function to save plotly figures
def save_plotly_fig(fig, filename):
    fig.write_image(f'visualizations/{filename}.png')
    fig.write_html(f'visualizations/{filename}.html')

In [None]:
# Load and clean data
def load_and_clean_data():
    df = pd.read_excel('Online Retail Data Set.xlsx')
    df = df.dropna()
    df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]
    df['TotalAmount'] = df['Quantity'] * df['UnitPrice']
    df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
    df['Month'] = df['InvoiceDate'].dt.to_period('M')
    return df

df_clean = load_and_clean_data()

## 1. Monthly Sales Trends

In [None]:
def create_monthly_sales_chart():
    monthly_sales = df_clean.groupby('Month')['TotalAmount'].sum().reset_index()
    monthly_sales['Month'] = monthly_sales['Month'].astype(str)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=monthly_sales['Month'],
        y=monthly_sales['TotalAmount'],
        mode='lines+markers',
        line=dict(color=colors[0], width=3),
        marker=dict(size=8)
    ))
    
    fig.update_layout(
        title='Monthly Sales Trends',
        xaxis_title='Month',
        yaxis_title='Total Sales (£)',
        template='plotly_white',
        showlegend=False
    )
    
    save_plotly_fig(fig, 'monthly_sales_trends')
    return fig

monthly_sales_chart = create_monthly_sales_chart()
monthly_sales_chart.show()

## 2. Country Sales Analysis

In [None]:
def create_country_sales_chart():
    country_sales = df_clean.groupby('Country')['TotalAmount'].sum().sort_values(ascending=True)
    top_countries = country_sales.tail(10)
    
    fig = go.Figure()
    fig.add_trace(go.Bar(
        y=top_countries.index,
        x=top_countries.values,
        orientation='h',
        marker_color=colors[1]
    ))
    
    fig.update_layout(
        title='Top 10 Countries by Sales',
        xaxis_title='Total Sales (£)',
        yaxis_title='Country',
        template='plotly_white'
    )
    
    save_plotly_fig(fig, 'country_sales')
    return fig

country_sales_chart = create_country_sales_chart()
country_sales_chart.show()

## 3. Product Analysis Dashboard

In [None]:
def create_product_dashboard():
    # Top products
    product_sales = df_clean.groupby('Description')['TotalAmount'].sum().sort_values(ascending=False)
    top_products = product_sales.head(10)
    
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=top_products.values,
        y=top_products.index,
        orientation='h',
        marker_color=colors[2]
    ))
    
    fig.update_layout(
        title='Top 10 Products by Revenue',
        xaxis_title='Revenue (£)',
        yaxis_title='Product',
        template='plotly_white',
        height=600
    )
    
    save_plotly_fig(fig, 'product_analysis')
    return fig

product_dashboard = create_product_dashboard()
product_dashboard.show()

## 4. Customer Cohort Analysis

In [None]:
def create_cohort_analysis():
    df_cohort = df_clean[df_clean['CustomerID'].notna()].copy()
    df_cohort['CohortMonth'] = df_cohort.groupby('CustomerID')['InvoiceDate'].transform('min').dt.to_period('M')
    df_cohort['CohortIndex'] = (df_cohort['InvoiceDate'].dt.to_period('M') - df_cohort['CohortMonth']).apply(lambda x: x.n)
    
    cohort_data = df_cohort.groupby(['CohortMonth', 'CohortIndex'])['CustomerID'].nunique().reset_index()
    cohort_table = cohort_data.pivot(index='CohortMonth', columns='CohortIndex', values='CustomerID')
    retention_table = cohort_table.divide(cohort_table.iloc[:, 0], axis=0)
    
    fig = go.Figure(data=go.Heatmap(
        z=retention_table.values,
        x=retention_table.columns,
        y=retention_table.index.astype(str),
        colorscale='RdYlBu',
        text=np.round(retention_table.values * 100, 1),
        texttemplate='%{text}%'
    ))
    
    fig.update_layout(
        title='Customer Cohort Retention Analysis',
        xaxis_title='Cohort Index (Months)',
        yaxis_title='Cohort Month',
        template='plotly_white'
    )
    
    save_plotly_fig(fig, 'cohort_analysis')
    return fig

cohort_analysis = create_cohort_analysis()
cohort_analysis.show()

## 5. Complete Dashboard

In [None]:
def create_complete_dashboard():
    fig = go.Figure()
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Monthly Sales', 'Top Countries', 'Top Products', 'Customer Retention')
    )
    
    # Add all visualizations
    monthly_sales = df_clean.groupby('Month')['TotalAmount'].sum().reset_index()
    fig.add_trace(go.Scatter(
        x=monthly_sales['Month'].astype(str),
        y=monthly_sales['TotalAmount'],
        mode='lines+markers'
    ), row=1, col=1)
    
    # Add other plots...
    
    fig.update_layout(
        height=1000,
        title_text='Sales Performance Dashboard',
        showlegend=False
    )
    
    save_plotly_fig(fig, 'complete_dashboard')
    return fig

complete_dashboard = create_complete_dashboard()
complete_dashboard.show()