In [1]:
# Dependencies
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets
import numpy as np
from scipy import special
import plotly.express as px


py.offline.init_notebook_mode(connected=True)

In [2]:
# Create DataFrame from .csv and review
df = pd.read_csv("../Data/channel_product_group_by_month.csv")
df.head(10)

Unnamed: 0,channel,month,product_grouping,product_quantity,sales
0,E-commerce 1,2021-12-01,11133.0,16887,168225.835
1,E-commerce 1,2021-12-01,11127.0,10955,109175.385
2,Website,2021-12-01,11113.0,2403,68847.4898
3,Website,2021-12-01,11137.0,1892,50905.555
4,Retailer 1,2021-12-01,11135.0,8316,45738.0
5,Website,2021-12-01,11127.0,3583,44648.34455
6,Website,2021-12-01,11135.0,3076,35965.21985
7,Retailer 1,2021-12-01,11127.0,5718,32208.0
8,Website,2021-12-01,11118.0,922,27943.17
9,Website,2021-12-01,11119.0,2453,24019.1101


In [3]:
df.dtypes

channel              object
month                object
product_grouping    float64
product_quantity      int64
sales               float64
dtype: object

In [4]:
# Round 'sales' column to 2 decimal places
df['sales'] = df['sales'].round(2)
df.head()

Unnamed: 0,channel,month,product_grouping,product_quantity,sales
0,E-commerce 1,2021-12-01,11133.0,16887,168225.83
1,E-commerce 1,2021-12-01,11127.0,10955,109175.38
2,Website,2021-12-01,11113.0,2403,68847.49
3,Website,2021-12-01,11137.0,1892,50905.56
4,Retailer 1,2021-12-01,11135.0,8316,45738.0


In [5]:
# Create 2020 only dataframe
df_2020 = df[df['month'] < '2021-01-01']
df_2020.sort_values('month', ascending=False)

Unnamed: 0,channel,month,product_grouping,product_quantity,sales
750,E-commerce 1,2020-12-01,11133.0,9565,95481.75
789,E-commerce 2,2020-12-01,11119.0,5,74.97
778,Retailer 2,2020-12-01,11127.0,219,1095.00
779,Website,2020-12-01,11123.0,51,616.00
780,E-commerce 1,2020-12-01,11124.0,37,462.32
...,...,...,...,...,...
1134,Website,2020-01-01,11124.0,517,5300.91
1132,Website,2020-01-01,11135.0,860,9175.80
1131,Website,2020-01-01,11127.0,928,9884.69
1130,Website,2020-01-01,11113.0,568,13021.77


In [6]:
# Create 2021 only dataframe
df_2021 = df[df['month'] > '2020-12-01']
df_2021.sort_values('month', ascending=True)

Unnamed: 0,channel,month,product_grouping,product_quantity,sales
749,Wholesale,2021-01-01,11136.0,6,0.00
719,Retailer 1,2021-01-01,11124.0,312,2026.44
718,Retailer 2,2021-01-01,11123.0,224,2798.88
717,E-commerce 1,2021-01-01,11128.0,266,2891.68
716,Website,2021-01-01,11147.0,479,3866.09
...,...,...,...,...,...
51,Website,2021-12-01,11111.0,11,58.46
52,Website,2021-12-01,11126.0,6,57.47
54,E-commerce 2,2021-12-01,11130.0,2,32.49
56,E-commerce 2,2021-12-01,11135.0,2,32.49


In [7]:
# Calculate total sales by "channel" column value
per_channel_sales = df.groupby(["channel"]).sum()["sales"].reset_index(name='sales')

# Convert series to df
per_channel_sales_df = pd.DataFrame(per_channel_sales)
per_channel_sales_df.head(10)

Unnamed: 0,channel,sales
0,E-commerce 1,3187840.57
1,E-commerce 2,19211.07
2,Retailer 1,1470805.97
3,Retailer 2,28210.31
4,Website,8080216.11
5,Wholesale,287085.91


In [29]:
# Plotly pie chart - total sales by "channel" column value
Sales_by_channel_fig = px.pie(per_channel_sales_df, names="channel", values="sales", title="Overall Sales by Channel",
        labels={"channel" : 'Channel', "sales" :'Sales in $'})
Sales_by_channel_fig.update_traces(textposition='inside', textinfo='percent+label')

Sales_by_channel_fig.show()

In [30]:
# Write to html file
Sales_by_channel_fig.write_html("../Graphs/Total_Sales_by_Channel_pie.html")

In [10]:
# Calculate total sales by "channel" column value - 2020 Only
per_channel_sales_2020 = df_2020.groupby(["channel"]).sum()["sales"].reset_index(name='sales')

# Convert series to df
per_channel_sales_df_2020 = pd.DataFrame(per_channel_sales_2020)
per_channel_sales_df_2020.head(10)

Unnamed: 0,channel,sales
0,E-commerce 1,827028.62
1,E-commerce 2,5421.15
2,Retailer 1,134691.78
3,Retailer 2,1510.68
4,Website,3326097.04
5,Wholesale,134959.47


In [31]:
# Plotly pie chart - total sales by "channel" column value - 2020 Only
Sales_by_channel_2020_fig = px.pie(per_channel_sales_df_2020, names="channel", values="sales", title="2020",
        labels={"channel" : 'Channel', "sales" :'Sales in $'})
Sales_by_channel_2020_fig.update_traces(textposition='inside', textinfo='percent+label')

Sales_by_channel_2020_fig.show()

In [32]:
# Write to html file
Sales_by_channel_2020_fig.write_html("../Graphs/Total_Sales_by_Channel_2020_pie.html")

In [13]:
# Calculate total sales by "channel" column value - 2021 Only
per_channel_sales_2021 = df_2021.groupby(["channel"]).sum()["sales"].reset_index(name='sales')

# Convert series to df
per_channel_sales_df_2021 = pd.DataFrame(per_channel_sales_2021)
per_channel_sales_df_2021.head(10)

Unnamed: 0,channel,sales
0,E-commerce 1,2360811.95
1,E-commerce 2,13789.92
2,Retailer 1,1336114.19
3,Retailer 2,26699.63
4,Website,4754119.07
5,Wholesale,152126.44


In [33]:
# Plotly pie chart - total sales by "channel" column value - 2021 Only
Sales_by_channel_2021_fig = px.pie(per_channel_sales_df_2021, names="channel", values="sales", title=" 2021",
        labels={"channel" : 'Channel', "sales" :'Sales in $'})
Sales_by_channel_2021_fig.update_traces(textposition='inside', textinfo='percent+label')

Sales_by_channel_2021_fig.show()

In [34]:
# Write to html file
Sales_by_channel_2021_fig.write_html("../Graphs/Total_Sales_by_Channel_2021_pie.html")

In [16]:
# Calculate units sold by "channel" column value
units_per_channel = df.groupby(["channel"]).sum()["product_quantity"].reset_index(name='product_quantity')

# Convert series to df
units_per_channel_df = pd.DataFrame(units_per_channel)
units_per_channel_df.head(10)

Unnamed: 0,channel,product_quantity
0,E-commerce 1,318714
1,E-commerce 2,1010
2,Retailer 1,240015
3,Retailer 2,3447
4,Website,667272
5,Wholesale,77872


In [17]:
units_per_channel_df.dtypes

channel             object
product_quantity     int64
dtype: object

In [18]:
# Plotly pie chart - total sales by "channel" column value
units_fig = px.pie(units_per_channel_df, names="channel", values="product_quantity", title="Units Sold by Channel",
        labels={"channel" : 'Channel', "product_quantity" :'Units Sold'})
units_fig.update_traces(textposition='inside', textinfo='percent+label')

units_fig.show()

In [19]:
# Write to html file
units_fig.write_html("../Graphs/Total_Units_by_Channel_pie.html")

In [20]:
# Calculate units sold by "channel" column value - 2020 Only
units_per_channel_2020 = df_2020.groupby(["channel"]).sum()["product_quantity"].reset_index(name='product_quantity')

# Convert series to df
units_per_channel_df_2020 = pd.DataFrame(units_per_channel_2020)
units_per_channel_df_2020.head(10)

Unnamed: 0,channel,product_quantity
0,E-commerce 1,82162
1,E-commerce 2,297
2,Retailer 1,21294
3,Retailer 2,283
4,Website,271003
5,Wholesale,18270


In [21]:
# Plotly pie chart - total sales by "channel" column value - 2020 Only
units_fig_2020 = px.pie(units_per_channel_df_2020, names="channel", values="product_quantity", title="Units Sold by Channel - 2020",
        labels={"channel" : 'Channel', "product_quantity" :'Units Sold'})
units_fig_2020.update_traces(textposition='inside', textinfo='percent+label')

units_fig_2020.show()

In [22]:
# Write to html file
units_fig_2020.write_html("../Graphs/Total_Units_by_Channel_2020_pie.html")

In [23]:
# Calculate units sold by "channel" column value - 2021 Only
units_per_channel_2021 = df_2021.groupby(["channel"]).sum()["product_quantity"].reset_index(name='product_quantity')

# Convert series to df
units_per_channel_df_2021 = pd.DataFrame(units_per_channel_2021)
units_per_channel_df_2021.head(10)

Unnamed: 0,channel,product_quantity
0,E-commerce 1,236552
1,E-commerce 2,713
2,Retailer 1,218721
3,Retailer 2,3164
4,Website,396269
5,Wholesale,59602


In [24]:
# Plotly pie chart - total sales by "channel" column value - 2021 Only
units_fig_2021 = px.pie(units_per_channel_df_2021, names="channel", values="product_quantity", title="Units Sold by Channel - 2021",
        labels={"channel" : 'Channel', "product_quantity" :'Units Sold'})
units_fig_2021.update_traces(textposition='inside', textinfo='percent+label')

units_fig_2021.show()

In [25]:
# Write to html file
units_fig_2021.write_html("../Graphs/Total_Units_by_Channel_2021_pie.html")

In [26]:
# Calculate units sold by "channel" column value
units_per_channel_month = df.groupby(["channel", "month"]).sum()["product_quantity"].reset_index(name='product_quantity')


# Convert series to df
units_per_channel_month_df = pd.DataFrame(units_per_channel_month)
units_per_channel_month_df.head(10)

Unnamed: 0,channel,month,product_quantity
0,E-commerce 1,2020-02-01,638
1,E-commerce 1,2020-03-01,4811
2,E-commerce 1,2020-04-01,5537
3,E-commerce 1,2020-05-01,7408
4,E-commerce 1,2020-06-01,6274
5,E-commerce 1,2020-07-01,7126
6,E-commerce 1,2020-08-01,7854
7,E-commerce 1,2020-09-01,9507
8,E-commerce 1,2020-10-01,10354
9,E-commerce 1,2020-11-01,11391


In [35]:
# Create graph showing units sold by channel per month

channel_fig = px.bar(units_per_channel_month_df, x='month', y='product_quantity',
             hover_data={'product_quantity' : ':.0f'}, color='channel',
             labels={'product_quantity':'Units Sold', 'channel':'Channel', 'month':'Month'},
             #title = "Units Sold by Channel per Month")
)
channel_fig.update_xaxes(
        tickangle = 90,
        title_text = "Month",
        title_font = {"size": 15},
        title_standoff = 25)

channel_fig.update_yaxes(
        title_text = "Units",
        title_font = {"size": 15},
        title_standoff = 20)
        
channel_fig.show()

In [36]:
# Write to html file
channel_fig.write_html("../Graphs/Units_by_Channel_by_Month_bar.html")