# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
#table_name = 'ORDER_TO_CASH_ENRICHED'
table_name = 'ORDER_TO_CASH_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [4]:
df['INVOICESTATUS'].unique()

array(['LATE_PAYMENT', 'ONTIME_PAYMENT', 'Approved', 'PendingApproval',
       'Sent'], dtype=object)

In [5]:
df_train = df[df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]
df_test = df[~df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]

In [6]:
df_train.shape, df_test.shape

((170161, 40), (693, 40))

In [7]:
df = df_train.copy()

In [8]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'])
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'])
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'])
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])

In [9]:
# Total Sales Volume
total_sales_volume = df.groupby('PRODUCTID')['ORDERQUANTITY'].sum().reset_index(name='PR_TOTAL_SALES_VOLUME')

In [10]:
# Total Sales Value
total_sales_value = df.groupby('PRODUCTID')['ORDERVALUE'].sum().reset_index(name='PR_TOTAL_SALES_VALUE')

In [11]:
# Average Order Quantity
average_order_quantity = df.groupby('PRODUCTID')['ORDERQUANTITY'].mean().reset_index(name='PR_AVG_ORDER_QUANTITY')

In [12]:
# Average Unit Price
average_unit_price = df.groupby('PRODUCTID')['UNITPRICE'].mean().reset_index(name='PR_AVG_UNIT_PRICE')

In [13]:
# Number of Orders
number_of_orders = df.groupby('PRODUCTID')['ORDERID'].nunique().reset_index(name='PR_NUMBER_OF_ORDERS')

In [14]:
category_sales_volume = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].sum().reset_index(name='CAT_TOTAL_SALES_VOLUME')
category_sales_value = df.groupby('PRODUCTCATEGORY')['ORDERVALUE'].sum().reset_index(name='CAT_TOTAL_SALES_VALUE')
category_average_order_quantity = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].mean().reset_index(name='CAT_AVG_ORDER_QUANTITY')
category_average_unit_price = df.groupby('PRODUCTCATEGORY')['UNITPRICE'].mean().reset_index(name='CAT_AVG_UNIT_PRICE')
category_number_of_orders = df.groupby('PRODUCTCATEGORY')['ORDERID'].nunique().reset_index(name='CAT_NUMBER_OF_ORDERS')

In [15]:
# Delivery Performance
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])
df['PR_DELIVERY_TIME'] = (df['DELIVEREDON'] - df['ORDERDATE']).dt.days
delivery_performance = df.groupby('PRODUCTID')['PR_DELIVERY_TIME'].mean().reset_index(name='PR_AVG_DELIVERY_TIME')
category_delivery_performance = df.groupby('PRODUCTCATEGORY')['PR_DELIVERY_TIME'].mean().reset_index(name='CAT_AVG_DELIVERY_TIME')

In [16]:
# Invoice Performance
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['PR_INVOICE_TIME'] = (df['PAYMENTDATE'] - df['INVOICEDATE']).dt.days
invoice_performance = df.groupby('PRODUCTID')['PR_INVOICE_TIME'].mean().reset_index(name='PR_AVG_INVOICE_TIME')
category_invoice_performance = df.groupby('PRODUCTCATEGORY')['PR_INVOICE_TIME'].mean().reset_index(name='CAT_AVG_INVOICE_TIME')

In [17]:
# Merge all KPIs into a single dataframe for PRODUCTID
product_kpis = total_sales_volume.merge(total_sales_value, on='PRODUCTID') \
                                 .merge(average_order_quantity, on='PRODUCTID') \
                                 .merge(average_unit_price, on='PRODUCTID') \
                                 .merge(number_of_orders, on='PRODUCTID') \
                                 .merge(delivery_performance, on='PRODUCTID') \
                                 .merge(invoice_performance, on='PRODUCTID')

In [18]:
# Merge all KPIs into a single dataframe for PRODUCTID
category_kpis = category_sales_volume.merge(category_sales_value, on='PRODUCTCATEGORY') \
                                 .merge(category_average_order_quantity, on='PRODUCTCATEGORY') \
                                 .merge(category_average_unit_price, on='PRODUCTCATEGORY') \
                                 .merge(category_number_of_orders, on='PRODUCTCATEGORY') \
                                 .merge(category_delivery_performance, on='PRODUCTCATEGORY') \
                                 .merge(category_invoice_performance, on='PRODUCTCATEGORY')

In [19]:
product_kpis.shape

(62, 8)

In [20]:
category_kpis.shape

(7, 8)

In [21]:
category_kpis

Unnamed: 0,PRODUCTCATEGORY,CAT_TOTAL_SALES_VOLUME,CAT_TOTAL_SALES_VALUE,CAT_AVG_ORDER_QUANTITY,CAT_AVG_UNIT_PRICE,CAT_NUMBER_OF_ORDERS,CAT_AVG_DELIVERY_TIME,CAT_AVG_INVOICE_TIME
0,Automotive,13194866,443409125,816.615051,38.336552,8046,15.471345,23.141045
1,Construction Materials,39572235,1352599137,2144.952843,144.119031,9194,18.004336,27.635861
2,Consumer Goods,18911904,1237150098,521.032151,169.860457,18151,6.012894,26.773783
3,Food Beverages,63455195,506713347,1615.951793,6.882372,19680,5.996766,18.014516
4,Healthcare,31056225,420747225,1813.92588,20.49781,8556,9.016763,23.339641
5,Industrial Equipment,1286433,12718312700,95.115194,17787.168946,6747,18.009982,37.853974
6,Office Supplies,11593445,1752937837,395.100876,174.639164,14626,12.996013,29.527996


In [22]:
# Merge the new features back into the original dataframe
df = df.merge(product_kpis, on='PRODUCTID', how='left')

In [23]:
# Merge the new features back into the original dataframe
df = df.merge(category_kpis, on='PRODUCTCATEGORY', how='left')

In [24]:
df.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'PR_DELIVERY_TIME', 'PR_INVOICE_TIME',
       'PR_TOTAL_SALES_VOLUME', 'PR_TOTAL_SALES_VALUE',
       'PR_AVG_ORDER_QUANTITY', 'PR_AVG_UNIT_PRICE', 'PR_NUMBER_OF_ORDERS',
       'PR_AVG_DELIVERY_TIME', 'PR_AVG_INVOICE_TIME', 'CAT_TOTAL_SALES_VOLUME',
       'CAT_TOTAL_SALES_VALUE', 'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_

In [25]:
df = df[['PRODUCTID', 'PRODUCTNAME','PRODUCTCATEGORY','PRODUCTTYPE','UNITPRICE','QUANTITY',
         'PR_TOTAL_SALES_VOLUME', 'PR_TOTAL_SALES_VALUE','PR_AVG_ORDER_QUANTITY', 'PR_AVG_UNIT_PRICE', 'PR_NUMBER_OF_ORDERS',
         'PR_AVG_DELIVERY_TIME', 'PR_AVG_INVOICE_TIME', 
         'CAT_TOTAL_SALES_VOLUME','CAT_TOTAL_SALES_VALUE', 'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE',
         'CAT_NUMBER_OF_ORDERS', 'CAT_AVG_DELIVERY_TIME','CAT_AVG_INVOICE_TIME']]

In [26]:
df.head()

Unnamed: 0,PRODUCTID,PRODUCTNAME,PRODUCTCATEGORY,PRODUCTTYPE,UNITPRICE,QUANTITY,PR_TOTAL_SALES_VOLUME,PR_TOTAL_SALES_VALUE,PR_AVG_ORDER_QUANTITY,PR_AVG_UNIT_PRICE,PR_NUMBER_OF_ORDERS,PR_AVG_DELIVERY_TIME,PR_AVG_INVOICE_TIME,CAT_TOTAL_SALES_VOLUME,CAT_TOTAL_SALES_VALUE,CAT_AVG_ORDER_QUANTITY,CAT_AVG_UNIT_PRICE,CAT_NUMBER_OF_ORDERS,CAT_AVG_DELIVERY_TIME,CAT_AVG_INVOICE_TIME
0,PID-78d4f6e9-b17b-4218-adeb-7fd1fa823fa4,Cement,Construction Materials,Cement,100,500,913278,91327800,247.032188,100.0,3697,18.025697,25.788207,39572235,1352599137,2144.952843,144.119031,9194,18.004336,27.635861
1,PID-42bd316c-176f-4ba7-aa56-79b05f5f9c3d,PVC Pipes,Construction Materials,PVC Pipes,3,5000,9189233,27567699,2492.333333,3.0,3687,17.998373,24.348793,39572235,1352599137,2144.952843,144.119031,9194,18.004336,27.635861
2,PID-0125b151-daca-429b-8e3c-7af9f65e8f18,Insulation Material,Construction Materials,Insulation Material,2,10000,18257565,36515130,4969.397115,2.0,3674,18.059336,24.339684,39572235,1352599137,2144.952843,144.119031,9194,18.004336,27.635861
3,PID-0125b151-daca-429b-8e3c-7af9f65e8f18,Insulation Material,Construction Materials,Insulation Material,2,10000,18257565,36515130,4969.397115,2.0,3674,18.059336,24.339684,39572235,1352599137,2144.952843,144.119031,9194,18.004336,27.635861
4,PID-42bd316c-176f-4ba7-aa56-79b05f5f9c3d,PVC Pipes,Construction Materials,PVC Pipes,3,5000,9189233,27567699,2492.333333,3.0,3687,17.998373,24.348793,39572235,1352599137,2144.952843,144.119031,9194,18.004336,27.635861


In [27]:
df.shape

(170161, 20)

In [28]:
df = df.drop_duplicates()

In [29]:
df.shape

(62, 20)

In [30]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_PRODUCT_KPI")