
# Finding answers

In [None]:
import pandas as pd
import seaborn as sns

from common_code import data_folder

In [None]:
sales_df = pd.read_csv(f'{data_folder}/sales2020.csv')

## Channel profits

In [None]:
channel_profits_df = (
    sales_df
    .groupby('Channel')
    .agg({
        'Quantity': 'sum'
        'Gross Profit': 'sum', 
        'Margin per Unit': 'mean'
    })
    .round(3)
    .sort_values('Gross Profit', ascending=False)
)

In [None]:
channel_profits_df

In [None]:
channel_profits_df.plot(
    kind='bar', figsize=(15, 4), subplots=True,
    layout=(1, 3), legend=False, rot=30
);

## Category profits

In [None]:
category_profits_df = (
    sales_df
    .groupby('Category')
    .agg({
        'Quantity': 'sum',
        'Gross Profit': 'sum', 
        'Margin per Unit': 'mean'
    })
    .round(3)
    .sort_values('Gross Profit', ascending=False)
)

In [None]:
category_profits_df

In [None]:
category_profits_df.plot( 
    kind='barh', figsize=(15, 6), subplots=True,
    layout=(1, 3), legend=False, sharex=False, sharey=True
)

## Channel and category profits

In [None]:
average_margin_per_category = pd.pivot_table(
    sales_df, 
    index='Channel', 
    columns='Category', 
    values='Margin per Unit',
    aggfunc='mean'
).round(3)

In [None]:
average_margin_per_category

In [None]:
sns.heatmap(
    data=average_margin_per_category.transpose(), 
    cmap='Reds', 
    annot=True, 
    fmt=".2f", 
    linewidths=.5,
    cbar=False
)

## Product profits

In [None]:
def get_product_profits(df, category):
    return df[df['Category'] == category]

In [None]:
get_product_profits(sales_df, 'Toys & Games')

In [None]:
def get_product_profits(df, category):
    
    df = df[df['Category'] == category]

    return (
        df.groupby('ProductID')
          .agg({
                'Product Name': 'first',
                'Unit Price': 'first',
                'Category': 'first',
                'Quantity': 'sum',
                'Gross Profit': 'sum',
                'Margin per Unit': 'mean'
            })
          .sort_values(by='Gross Profit', ascending=False)
          .reset_index()
          .round(3)
    )

In [None]:
get_product_profits(sales_df, 'Toys & Games')

In [None]:
def get_product_profits(df, 
                        category='All', channel='All', 
                        sort_column='Gross Profit', ascending=False):

    if category != 'All':
        df = df[df['Category'] == category]

    if channel != 'All':
        df = df[df['Channel'] == channel]

    return (
        df.groupby('ProductID')
          .agg({
                'Product Name': 'first',
                'Unit Price': 'first',
                'Category': 'first',
                'Quantity': 'sum',
                'Gross Profit': 'sum',
                'Margin per Unit': 'mean'
            })
          .sort_values(by=sort_column, ascending=ascending)
          .reset_index()
          .round(3)
    )

In [None]:
get_product_profits(sales_df, 
                    category='Toys & Games', 
                    channel='iBay.com', 
                    sort_column='Margin per Unit', 
                    ascending=True)

In [None]:
with pd.ExcelWriter(f'{data_folder}/Product profits 2020.xlsx') as writer:
    for category in sales_df['Category'].unique():
        products_df = get_product_profits(sales_df, category)
        products_df.to_excel(writer, sheet_name=category, index=False)

## Sharing results

### Exporting notebooks

In [None]:
!jupyter-nbconvert "03 - Finding answers.ipynb" --no-input --output "Sales analysis.html";

### Overthinking: Creating an interactive dashboard

In [None]:
import pandas as pd

import hvplot.pandas
import panel as pn
pn.extension();

from common_code import data_folder, get_product_profits

In [None]:
sales_df = pd.read_csv(f'{data_folder}/sales2020.csv')

In [None]:
def products_scatterplot(category='All', channel='All'):    
    df = get_product_profits(sales_df, category, channel)
    
    return df.hvplot(
        kind='scatter',
        x='Gross Profit', 
        y='Margin per Unit', 
        size='Quantity', 
        color='Category',
        scale=0.2, 
        grid=True,
        line_color='black', 
        width=900, 
        height=600, 
        hover_cols=['ProductID', 'Product Name', 'Unit Price']
    )

In [None]:
products_scatterplot()

In [None]:
test_dropdown = pn.widgets.Select(name='Test dropdown', value='a', options=['a', 'b', 'c'])

In [None]:
test_dropdown

In [None]:
channels = ['All'] + sorted(sales_df['Channel'].unique())
channel_dropdown = pn.widgets.Select(name='Channel', value='All', options=channels)

In [None]:
channel_dropdown

In [None]:
categories = ['All'] + sorted(sales_df['Category'].unique())
category_dropdown = pn.widgets.Select(name='Category', value='All', options=categories)

In [None]:
category_dropdown

In [None]:
@pn.depends(category_dropdown, channel_dropdown)
def products_scatterplot(category='All', channel='All'):    
    df = get_product_profits(sales_df, category, channel)
    
    return df.hvplot(
        kind='scatter',
        x='Gross Profit', 
        y='Margin per Unit', 
        size='Quantity', 
        color='Category',
        scale=0.2, 
        grid=True,
        line_color='black', 
        width=900, 
        height=600, 
        hover_cols=['ProductID', 'Product Name', 'Unit Price']
    )

In [None]:
pn.Column(
    category_dropdown,
    channel_dropdown,
    products_scatterplot
)

In [None]:
dashboard = pn.Column(
    "# Products dashboard",
    "Select sales channel or product category using the menus below:",
    category_dropdown,
    channel_dropdown,
    products_scatterplot
)

In [None]:
dashboard.show()

In [None]:
dashboard.save('Products dashboard.html', embed=True)