# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [27]:
#table_name = 'ORDER_TO_CASH_ENRICHED'
table_name = 'ORDER_TO_CASH_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [28]:
df['INVOICESTATUS'].unique()

array(['LATE_PAYMENT', 'ONTIME_PAYMENT', 'PendingApproval', 'Approved',
       'Sent'], dtype=object)

In [29]:
df_train = df[df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]
df_test = df[~df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]

In [30]:
df_train.shape, df_test.shape

((170445, 40), (2992, 40))

In [31]:
df = df_train.copy()

In [32]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'])
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'])
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'])
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])

In [33]:
# Total Sales Volume
total_sales_volume = df.groupby('PRODUCTID')['ORDERQUANTITY'].sum().reset_index(name='PR_TOTAL_SALES_VOLUME')

In [34]:
# Total Sales Value
total_sales_value = df.groupby('PRODUCTID')['ORDERVALUE'].sum().reset_index(name='PR_TOTAL_SALES_VALUE')

In [35]:
# Average Order Quantity
average_order_quantity = df.groupby('PRODUCTID')['ORDERQUANTITY'].mean().reset_index(name='PR_AVG_ORDER_QUANTITY')

In [36]:
# Average Unit Price
average_unit_price = df.groupby('PRODUCTID')['UNITPRICE'].mean().reset_index(name='PR_AVG_UNIT_PRICE')

In [37]:
# Number of Orders
number_of_orders = df.groupby('PRODUCTID')['ORDERID'].nunique().reset_index(name='PR_NUMBER_OF_ORDERS')

In [38]:
category_sales_volume = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].sum().reset_index(name='CAT_TOTAL_SALES_VOLUME')
category_sales_value = df.groupby('PRODUCTCATEGORY')['ORDERVALUE'].sum().reset_index(name='CAT_TOTAL_SALES_VALUE')
category_average_order_quantity = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].mean().reset_index(name='CAT_AVG_ORDER_QUANTITY')
category_average_unit_price = df.groupby('PRODUCTCATEGORY')['UNITPRICE'].mean().reset_index(name='CAT_AVG_UNIT_PRICE')
category_number_of_orders = df.groupby('PRODUCTCATEGORY')['ORDERID'].nunique().reset_index(name='CAT_NUMBER_OF_ORDERS')

In [41]:
# Delivery Performance
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])
df['PR_DELIVERY_TIME'] = (df['DELIVEREDON'] - df['ORDERDATE']).dt.days
delivery_performance = df.groupby('PRODUCTID')['PR_DELIVERY_TIME'].mean().reset_index(name='PR_AVG_DELIVERY_TIME')
category_delivery_performance = df.groupby('PRODUCTCATEGORY')['PR_DELIVERY_TIME'].mean().reset_index(name='CAT_AVG_DELIVERY_TIME')

In [42]:
# Invoice Performance
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['PR_INVOICE_TIME'] = (df['PAYMENTDATE'] - df['INVOICEDATE']).dt.days
invoice_performance = df.groupby('PRODUCTID')['PR_INVOICE_TIME'].mean().reset_index(name='PR_AVG_INVOICE_TIME')
category_invoice_performance = df.groupby('PRODUCTCATEGORY')['PR_INVOICE_TIME'].mean().reset_index(name='CAT_AVG_INVOICE_TIME')

In [43]:
# Merge all KPIs into a single dataframe for PRODUCTID
product_kpis = total_sales_volume.merge(total_sales_value, on='PRODUCTID') \
                                 .merge(average_order_quantity, on='PRODUCTID') \
                                 .merge(average_unit_price, on='PRODUCTID') \
                                 .merge(number_of_orders, on='PRODUCTID') \
                                 .merge(delivery_performance, on='PRODUCTID') \
                                 .merge(invoice_performance, on='PRODUCTID')

In [44]:
# Merge all KPIs into a single dataframe for PRODUCTID
category_kpis = category_sales_volume.merge(category_sales_value, on='PRODUCTCATEGORY') \
                                 .merge(category_average_order_quantity, on='PRODUCTCATEGORY') \
                                 .merge(category_average_unit_price, on='PRODUCTCATEGORY') \
                                 .merge(category_number_of_orders, on='PRODUCTCATEGORY') \
                                 .merge(category_delivery_performance, on='PRODUCTCATEGORY') \
                                 .merge(category_invoice_performance, on='PRODUCTCATEGORY')

In [45]:
product_kpis.shape

(62, 8)

In [46]:
category_kpis.shape

(7, 8)

In [47]:
category_kpis

Unnamed: 0,PRODUCTCATEGORY,CAT_TOTAL_SALES_VOLUME,CAT_TOTAL_SALES_VALUE,CAT_AVG_ORDER_QUANTITY,CAT_AVG_UNIT_PRICE,CAT_NUMBER_OF_ORDERS,CAT_AVG_DELIVERY_TIME,CAT_AVG_INVOICE_TIME
0,Automotive,12960780,442837278,821.030027,38.718421,7881,15.431648,23.397504
1,Construction Materials,39262752,1311431532,2162.164877,141.970428,9053,18.018558,27.28713
2,Consumer Goods,18861485,1227847878,521.409991,169.083568,18126,5.996766,26.511113
3,Food Beverages,64131918,513353051,1618.879667,6.880853,19683,5.993008,18.009592
4,Healthcare,30705201,414761779,1817.413495,20.433856,8478,9.009293,23.327848
5,Industrial Equipment,1348785,13029094260,97.272826,17754.954565,6888,18.022285,37.953411
6,Office Supplies,11852973,1790952920,395.758698,175.070684,14891,12.986144,29.402972


In [48]:
# Merge the new features back into the original dataframe
df = df.merge(product_kpis, on='PRODUCTID', how='left')

In [49]:
# Merge the new features back into the original dataframe
df = df.merge(category_kpis, on='PRODUCTCATEGORY', how='left')

In [50]:
df.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'PR_DELIVERY_TIME', 'PR_INVOICE_TIME',
       'PR_TOTAL_SALES_VOLUME', 'PR_TOTAL_SALES_VALUE',
       'PR_AVG_ORDER_QUANTITY', 'PR_AVG_UNIT_PRICE', 'PR_NUMBER_OF_ORDERS',
       'PR_AVG_DELIVERY_TIME', 'PR_AVG_INVOICE_TIME', 'CAT_TOTAL_SALES_VOLUME',
       'CAT_TOTAL_SALES_VALUE', 'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_

In [51]:
df = df[['PRODUCTID', 'PRODUCTNAME','PRODUCTCATEGORY','PRODUCTTYPE','UNITPRICE','QUANTITY',
         'PR_TOTAL_SALES_VOLUME', 'PR_TOTAL_SALES_VALUE','PR_AVG_ORDER_QUANTITY', 'PR_AVG_UNIT_PRICE', 'PR_NUMBER_OF_ORDERS',
         'PR_AVG_DELIVERY_TIME', 'PR_AVG_INVOICE_TIME', 
         'CAT_TOTAL_SALES_VOLUME','CAT_TOTAL_SALES_VALUE', 'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE',
         'CAT_NUMBER_OF_ORDERS', 'CAT_AVG_DELIVERY_TIME','CAT_AVG_INVOICE_TIME']]

In [52]:
df.head()

Unnamed: 0,PRODUCTID,PRODUCTNAME,PRODUCTCATEGORY,PRODUCTTYPE,UNITPRICE,QUANTITY,PR_TOTAL_SALES_VOLUME,PR_TOTAL_SALES_VALUE,PR_AVG_ORDER_QUANTITY,PR_AVG_UNIT_PRICE,PR_NUMBER_OF_ORDERS,PR_AVG_DELIVERY_TIME,PR_AVG_INVOICE_TIME,CAT_TOTAL_SALES_VOLUME,CAT_TOTAL_SALES_VALUE,CAT_AVG_ORDER_QUANTITY,CAT_AVG_UNIT_PRICE,CAT_NUMBER_OF_ORDERS,CAT_AVG_DELIVERY_TIME,CAT_AVG_INVOICE_TIME
0,PID-f9cee340-35af-4ca9-a425-bf9ae02b9d9c,Energy Drink,Food Beverages,Energy Drink,7,5000,10017231,70120617,2517.524755,7.0,3979,5.99246,18.118874,64131918,513353051,1618.879667,6.880853,19683,5.993008,18.009592
1,PID-efe86bed-b844-4638-a39c-d0c5225d5243,Cold-Pressed Juice,Food Beverages,Cold-Pressed Juice,11,300,608786,6696646,152.846096,11.0,3983,5.989957,15.904846,64131918,513353051,1618.879667,6.880853,19683,5.993008,18.009592
2,PID-df71fb0e-604f-4ded-8a16-850f47932a2a,Bottled Water,Food Beverages,Bottled Water,5,10000,19956593,99782965,4976.706484,5.0,4010,6.023192,20.009476,64131918,513353051,1618.879667,6.880853,19683,5.993008,18.009592
3,PID-cd8a4e2e-7ba8-48a4-8f9b-1b3f287072aa,Spark Plugs,Automotive,Spark Plugs,4,2000,1666459,6665836,1022.367485,4.0,1630,15.368098,19.594479,12960780,442837278,821.030027,38.718421,7881,15.431648,23.397504
4,PID-6248f015-f5d2-40df-ba1d-48ed494badf9,Radiator Coolant,Automotive,Radiator Coolant,14,2000,1398538,19579532,991.167966,14.0,1411,15.593905,22.015592,12960780,442837278,821.030027,38.718421,7881,15.431648,23.397504


In [53]:
df.shape

(170445, 20)

In [54]:
df = df.drop_duplicates()

In [55]:
df.shape

(62, 20)

In [None]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_PRODUCT_KPI")