# OLAP Visualisations

## Modules

In [1]:
import pandas as pd
import plotly.express as px
import os

## Roll-up on TotalSales by Country and Quarter

In [8]:
def get_top_countries_by_sales(rollup_file, top_n=5):
    # Load the data from CSV file
    rollup_data = pd.read_csv(rollup_file)

    # Aggregate total sales by country and quarter
    country_quarter_sales = rollup_data.groupby(['Country', 'Quarter'])['TotalSales'].sum().reset_index()

    # Calculate total sales per country across all quarters
    country_sales_total = country_quarter_sales.groupby('Country')['TotalSales'].sum().reset_index()

    # Sort countries by total sales in descending order and select the top N
    top_countries = country_sales_total.nlargest(top_n, 'TotalSales')['Country'].tolist()

    # Filter the original data to include only the top N countries
    top_country_sales = country_quarter_sales[country_quarter_sales['Country'].isin(top_countries)]

    return top_country_sales

In [9]:
def visualize_rollup_sales_by_country_and_quarter(rollup_file, top_n=5):
    # Get the top N countries' sales data
    top_country_sales = get_top_countries_by_sales(rollup_file, top_n)

    # Create a line plot showing sales by country and quarter
    fig = px.line(top_country_sales, x='Quarter', y='TotalSales', color='Country',
                  title=f'Total Sales by Top {top_n} Countries and Quarter',
                  labels={'TotalSales': 'Total Sales', 'Quarter': 'Quarter', 'Country': 'Country'},
                  markers=True,
                  width=800,  # Set the width of the figure
                  height=600)

    # Save the figure as a PNG file with the same name as the CSV file
    fig.write_image(f"{rollup_file.split('.')[0]}_top_{top_n}_countries.png")
    fig.show()

In [11]:
visualize_rollup_sales_by_country_and_quarter('roll_up_on_total_sales_by_country_and_quarter.csv')

## Drill-down on a Particular Country

In [12]:
def visualize_drilldown_sales_for_country(drilldown_file):
    # Load the data from CSV file
    drilldown_data = pd.read_csv(drilldown_file)

    # Ensure 'Year' and 'Month' are treated as strings for proper concatenation
    drilldown_data['Year'] = drilldown_data['Year'].astype(str)
    drilldown_data['Month'] = drilldown_data['Month'].astype(str).str.zfill(2)
    
    # Create a 'Year-Month' column for proper sorting
    drilldown_data['YearMonth'] = drilldown_data['Year'] + '-' + drilldown_data['Month']
    
    # Aggregate total sales by 'YearMonth'
    monthly_sales = drilldown_data.groupby('YearMonth')['TotalSales'].sum().reset_index()

    # Create a scatter plot for sales details by invoice number
    fig = px.line(monthly_sales, x='YearMonth', y='TotalSales',
                 title='Monthly Total Sales',
                 labels={'TotalSales': 'Total Sales', 'YearMonth': 'Year-Month'},
                 width=800,  # Set the width of the figure
                 height=600)

    # Customize the x-axis to display year-month format
    fig.update_xaxes(tickformat='%Y-%m', dtick='M1')
    
    # Save the figure as a PNG file with the same name as the CSV file
    fig.write_image(f"{drilldown_file.split('.')[0]}.png")
    fig.show()

In [13]:
visualize_drilldown_sales_for_country('drill_down_on_particular_country.csv')

## Slice on a Particular Product

In [6]:
def visualize_slice_sales_by_product_category(slice_file):
    # Load the data from CSV file
    slice_data = pd.read_csv(slice_file)

    # Create a bar plot for total sales by product category
    fig = px.bar(slice_data, x='Description', y='TotalSales', 
                 title='Total Sales by Product Category',
                 labels={'TotalSales': 'Total Sales', 'Description': 'Product Category'},
                 color='Description',
                 width=800,  # Set the width of the figure
                 height=600)

    # Save the figure as a PNG file with the same name as the CSV file
    fig.write_image(f"{slice_file.split('.')[0]}.png")
    fig.show()

In [7]:
visualize_slice_sales_by_product_category('slice_on_a_particular_product.csv')