### import libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator, MaxNLocator

### settings

In [2]:
%matplotlib inline

pd.set_option('display.max_rows', None)

### import data

In [3]:
%store -r sc_dataframes_cleaned

In [4]:
sc_dataframes_cleaned.keys()

dict_keys(['sc_dim_customer', 'sc_dim_market', 'sc_dim_product', 'sc_fact_forecast_monthly', 'sc_fact_sales_monthly', 'sc_freight_cost', 'sc_gross_price', 'sc_manufacturing_cost', 'sc_post_invoice_deductions_18_20', 'sc_post_invoice_deductions_21_22', 'sc_pre_invoice_deductions'])

In [5]:
# Create copies of each dataframe

sc_dim_customer = sc_dataframes_cleaned['sc_dim_customer'].copy()
sc_dim_market = sc_dataframes_cleaned['sc_dim_market'].copy()
sc_dim_product = sc_dataframes_cleaned['sc_dim_product'].copy()
sc_fact_forecast_monthly = sc_dataframes_cleaned['sc_fact_forecast_monthly'].copy()
sc_fact_sales_monthly = sc_dataframes_cleaned['sc_fact_sales_monthly'].copy()
sc_freight_cost = sc_dataframes_cleaned['sc_freight_cost'].copy()
sc_gross_price = sc_dataframes_cleaned['sc_gross_price'].copy()
sc_manufacturing_cost = sc_dataframes_cleaned['sc_manufacturing_cost'].copy()
sc_post_invoice_deductions_18_20 = sc_dataframes_cleaned['sc_post_invoice_deductions_18_20'].copy()
sc_post_invoice_deductions_21_22 = sc_dataframes_cleaned['sc_post_invoice_deductions_21_22'].copy()
sc_pre_invoice_deductions = sc_dataframes_cleaned['sc_pre_invoice_deductions'].copy()

### Analysis

In [6]:
# Get all column names to input into large language model

# Initialize an empty set to store unique column names
all_column_names = set()

# Iterate over each key-value pair in the dictionary
for df_name, df in sc_dataframes_cleaned.items():
    # Get the column names of the current DataFrame and add them to the set
    all_column_names.update(df.columns)
    
# List all column names
list(all_column_names)

['region',
 'division',
 'product_code',
 'date',
 'channel',
 'cost_year',
 'manufacturing_cost',
 'discounts_pct',
 'sold_quantity',
 'variant',
 'sub_zone',
 'customer',
 'segment',
 'other_deductions_pct',
 'pre_invoice_discount_pct',
 'forecast_quantity',
 'fiscal_year',
 'market',
 'product',
 'category',
 'gross_price',
 'customer_code',
 'other_cost_pct',
 'platform',
 'freight_pct']

In [33]:
sc_dim_customer.head(10)

Unnamed: 0,customer,market,platform,channel,customer_code
0,Brick & Mortar,Brick & Mortar,Brick & Mortar,Retailer,90002012
1,Brick & Mortar,Brick & Mortar,Brick & Mortar,Retailer,90002013
2,E-Commerce,E-Commerce,E-Commerce,Retailer,90002010
3,Brick & Mortar,Brick & Mortar,Brick & Mortar,Retailer,90002011
4,Brick & Mortar,Brick & Mortar,Brick & Mortar,Retailer,90002014
5,Brick & Mortar,Brick & Mortar,Brick & Mortar,Direct,70002017
6,E-Commerce,E-Commerce,E-Commerce,Direct,70002018
7,Brick & Mortar,Brick & Mortar,Brick & Mortar,Retailer,90002015
8,E-Commerce,E-Commerce,E-Commerce,Retailer,90002016
9,Brick & Mortar,Brick & Mortar,Brick & Mortar,Retailer,90002003


In [34]:
sc_dim_market.head(10)

Unnamed: 0,market,sub_zone,region
0,China,ROA,APAC
1,India,India,APAC
2,Indonesia,ROA,APAC
3,Japan,ROA,APAC
4,Pakistan,ROA,APAC
5,Philiphines,ROA,APAC
6,South Korea,ROA,APAC
7,Australia,ANZ,APAC
8,Newzealand,ANZ,APAC
9,Bangladesh,ROA,APAC


In [35]:
sc_dim_product.head(10)

Unnamed: 0,product_code,division,segment,category,product,variant
0,A0118150101,P & A,Peripherals,Internal HDD,AQ Dracula HDD – 3.5 Inch SATA 6 Gb/s 5400 RPM...,Standard
1,A0118150102,P & A,Peripherals,Internal HDD,AQ Dracula HDD – 3.5 Inch SATA 6 Gb/s 5400 RPM...,Plus
2,A0118150103,P & A,Peripherals,Internal HDD,AQ Dracula HDD – 3.5 Inch SATA 6 Gb/s 5400 RPM...,Premium
3,A0118150104,P & A,Peripherals,Internal HDD,AQ Dracula HDD – 3.5 Inch SATA 6 Gb/s 5400 RPM...,Premium Plus
4,A0219150201,P & A,Peripherals,Internal HDD,AQ WereWolf NAS Internal Hard Drive HDD – 8.89 cm,Standard
5,A0219150202,P & A,Peripherals,Internal HDD,AQ WereWolf NAS Internal Hard Drive HDD – 8.89 cm,Plus
6,A0220150203,P & A,Peripherals,Internal HDD,AQ WereWolf NAS Internal Hard Drive HDD – 8.89 cm,Premium
7,A0320150301,P & A,Peripherals,Internal HDD,AQ Zion Saga,Standard
8,A0321150302,P & A,Peripherals,Internal HDD,AQ Zion Saga,Plus
9,A0321150303,P & A,Peripherals,Internal HDD,AQ Zion Saga,Premium


In [36]:
sc_fact_forecast_monthly.head(10)

Unnamed: 0,date,division,category,product_code,product,market,platform,channel,customer_code,customer_name,forecast_quantity
0,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Direct,70008169,AltiQ Exclusive,146
1,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Retailer,90008165,Forward Stores,120
2,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Retailer,90008166,Sound,216
3,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Retailer,90008167,Electricalsocity,141
4,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,E-Commerce,Direct,70008170,Atliq e Store,85
5,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Bangladesh,Brick & Mortar,Direct,70010047,AltiQ Exclusive,0
6,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Brazil,E-Commerce,Retailer,90027207,Amazon,14
7,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Canada,Brick & Mortar,Direct,70023031,AltiQ Exclusive,30
8,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Canada,Brick & Mortar,Retailer,90023022,Nomad Stores,8
9,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Canada,Brick & Mortar,Retailer,90023025,Premium Stores,25


In [22]:
sc_fact_sales_monthly.head(10)

Unnamed: 0,date,division,category,product_code,product,market,platform,channel,customer_code,customer_name,sold_quantity
0,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Direct,70008169,AltiQ Exclusive,81
1,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Retailer,90008165,Forward Stores,157
2,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Retailer,90008166,Sound,126
3,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,Brick & Mortar,Retailer,90008167,Electricalsocity,160
4,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Australia,E-Commerce,Direct,70008170,Atliq e Store,120
5,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Brazil,E-Commerce,Retailer,90027207,Amazon,9
6,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Canada,Brick & Mortar,Direct,70023031,AltiQ Exclusive,9
7,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Canada,Brick & Mortar,Retailer,90023022,Nomad Stores,24
8,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Canada,Brick & Mortar,Retailer,90023025,Premium Stores,22
9,2017-09-01,N & S,External Solid State Drives,A6218160101,AQ Digit SSD,Canada,Brick & Mortar,Retailer,90023026,Relief,37


In [27]:
sc_freight_cost.head(10)

Unnamed: 0,market,fiscal_year,freight_pct,other_cost_pct
0,Australia,2018,0.0188,0.005
1,Austria,2018,0.0272,0.0053
2,Bangladesh,2018,0.0219,0.0058
3,Brazil,2018,0.0239,0.0033
4,Canada,2018,0.0264,0.0054
5,Chile,2018,0.0267,0.0022
6,China,2018,0.0204,0.0043
7,Columbia,2018,0.0216,0.0028
8,France,2018,0.019,0.0038
9,Germany,2018,0.0301,0.0061


In [28]:
sc_gross_price.head(10)

Unnamed: 0,product_code,fiscal_year,gross_price
0,A0118150101,2018,15.3952
1,A0118150101,2019,14.4392
2,A0118150101,2020,16.2323
3,A0118150101,2021,19.0573
4,A0118150102,2018,19.5875
5,A0118150102,2019,18.5595
6,A0118150102,2020,19.8577
7,A0118150102,2021,21.4565
8,A0118150103,2018,19.363
9,A0118150103,2019,19.3442


In [29]:
sc_manufacturing_cost.head(10)

Unnamed: 0,product_code,cost_year,manufacturing_cost
0,A0118150101,2018,4.619
1,A0118150101,2019,4.2033
2,A0118150101,2020,5.0207
3,A0118150101,2021,5.5172
4,A0118150102,2018,5.6036
5,A0118150102,2019,5.3235
6,A0118150102,2020,5.718
7,A0118150102,2021,6.2835
8,A0118150103,2018,5.9469
9,A0118150103,2019,5.5306


In [30]:
sc_post_invoice_deductions_18_20.head(10)

Unnamed: 0,customer_code,product_code,date,discounts_pct,other_deductions_pct
0,70002017,A0118150101,2017-09-01,0.265957,0.071871
1,70002017,A0118150101,2017-10-01,0.308992,0.097627
2,70002017,A0118150101,2017-11-01,0.331268,0.075211
3,70002017,A0118150101,2018-01-01,0.295792,0.072036
4,70002017,A0118150101,2018-02-01,0.320787,0.079335
5,70002017,A0118150101,2018-03-01,0.263483,0.100745
6,70002017,A0118150101,2018-05-01,0.223149,0.08196
7,70002017,A0118150101,2018-06-01,0.30195,0.079133
8,70002017,A0118150101,2018-07-01,0.312341,0.092945
9,70002017,A0118150102,2017-09-01,0.230225,0.086882


In [31]:
sc_post_invoice_deductions_21_22.head(10)

Unnamed: 0,customer_code,product_code,date,discounts_pct,other_deductions_pct
0,70002017,A0118150101,2021-01-01,0.265561,0.135387
1,70002017,A0118150101,2021-02-01,0.231823,0.125004
2,70002017,A0118150101,2021-03-01,0.265881,0.140936
3,70002017,A0118150101,2021-05-01,0.245106,0.138077
4,70002017,A0118150101,2021-06-01,0.253668,0.130621
5,70002017,A0118150101,2021-07-01,0.239794,0.152087
6,70002017,A0118150102,2021-01-01,0.288799,0.119384
7,70002017,A0118150102,2021-02-01,0.284986,0.140314
8,70002017,A0118150102,2021-03-01,0.261185,0.16799
9,70002017,A0118150102,2021-05-01,0.212951,0.176572


In [32]:
sc_pre_invoice_deductions.head(10)

Unnamed: 0,customer_code,fiscal_year,pre_invoice_discount_pct
0,70002017,2018,0.082442
1,70002017,2019,0.077659
2,70002017,2020,0.073458
3,70002017,2021,0.070269
4,70002017,2022,0.105678
5,70002018,2018,0.295568
6,70002018,2019,0.257655
7,70002018,2020,0.225481
8,70002018,2021,0.206107
9,70002018,2022,0.293093


# Request 1
## Can you identify the top three product categories by gross revenue for each fiscal year?

In [None]:
#  Calculate gross revenue

# Request 2
## Analyze the distribution of manufacturing costs across different product divisions and determine which division has the highest average manufacturing cost?

# Request 3
## Analyze the relationship between the 'forecast_quantity' and 'sold_quantity' of products. Considering different product variants, identify whether there's a consistent pattern of over-forecasting or under-forecasting across various product variants.

## Investigate if certain customer segments or regions tend to have higher discrepancies between forecasted and actual sales quantities

## Propose potential strategies to optimize forecasting accuracy based on these findings."