# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [4]:
df = df[df['INVOICESTATUS'] == 'Paid']
df_new = df[df['INVOICESTATUS'] != 'Paid']

In [5]:
df.shape, df_new.shape

((170290, 37), (0, 37))

In [6]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'])
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'])
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'])
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])

In [7]:
# Total Sales Volume
total_sales_volume = df.groupby('PRODUCTID')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')

In [8]:
# Total Sales Value
total_sales_value = df.groupby('PRODUCTID')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')

In [9]:
# Average Order Quantity
average_order_quantity = df.groupby('PRODUCTID')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')

In [10]:
# Average Unit Price
average_unit_price = df.groupby('PRODUCTID')['UNITPRICE'].mean().reset_index(name='Average Unit Price')

In [11]:
# Number of Orders
number_of_orders = df.groupby('PRODUCTID')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [12]:
category_sales_volume = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')
category_sales_value = df.groupby('PRODUCTCATEGORY')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')
category_average_order_quantity = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')
category_average_unit_price = df.groupby('PRODUCTCATEGORY')['UNITPRICE'].mean().reset_index(name='Average Unit Price')
category_number_of_orders = df.groupby('PRODUCTCATEGORY')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [13]:
type_sales_volume = df.groupby('PRODUCTTYPE')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')
type_sales_value = df.groupby('PRODUCTTYPE')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')
type_average_order_quantity = df.groupby('PRODUCTTYPE')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')
type_average_unit_price = df.groupby('PRODUCTTYPE')['UNITPRICE'].mean().reset_index(name='Average Unit Price')
type_number_of_orders = df.groupby('PRODUCTTYPE')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [14]:
# Delivery Performance
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])
df['Delivery Time'] = (df['DELIVEREDON'] - df['ORDERDATE']).dt.days
delivery_performance = df.groupby('PRODUCTID')['Delivery Time'].mean().reset_index(name='Average Delivery Time')

In [15]:
# Invoice Performance
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['Invoice Time'] = (df['PAYMENTDATE'] - df['INVOICEDATE']).dt.days
invoice_performance = df.groupby('PRODUCTID')['Invoice Time'].mean().reset_index(name='Average Invoice Time')

In [16]:
# Merge all KPIs into a single dataframe for PRODUCTID
product_kpis = total_sales_volume.merge(total_sales_value, on='PRODUCTID') \
                                 .merge(average_order_quantity, on='PRODUCTID') \
                                 .merge(average_unit_price, on='PRODUCTID') \
                                 .merge(number_of_orders, on='PRODUCTID') \
                                 .merge(delivery_performance, on='PRODUCTID') \
                                 .merge(invoice_performance, on='PRODUCTID')

In [17]:
# Merge the new features back into the original dataframe
df = df.merge(product_kpis, on='PRODUCTID', how='left')

In [22]:
df.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'NAME', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CREDITRATING', 'CUSTOMERTYPE', 'Delivery Time', 'Invoice Time',
       'Total Sales Volume', 'Total Sales Value', 'Average Order Quantity',
       'Average Unit Price', 'Number of Orders', 'Average Delivery Time',
       'Average Invoice Time'],
      dtype='object')

In [18]:
df.shape

(170290, 46)

In [19]:
df = df.drop_duplicates()

In [21]:
df.shape

(170290, 46)

In [20]:
df.head()

Unnamed: 0,ORDERID,ORDERITEMID,PRODUCTID,PRODUCTNAME,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CUSTOMERID,CUSTOMERNAME,PRODUCTCATEGORY,...,CUSTOMERTYPE,Delivery Time,Invoice Time,Total Sales Volume,Total Sales Value,Average Order Quantity,Average Unit Price,Number of Orders,Average Delivery Time,Average Invoice Time
0,OR-1d383f77-5592-46ad-8e52-29d33a6502ad,OI-5759e816-1866-4073-8d3c-6ad53993c09b,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,1630,2,3260,CID-4a14e75e-cd20-44a1-9696-d5eb1746b894,Anabrige LLP,Construction Materials,...,Construction Materials,7,2,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
1,OR-1d383f77-5592-46ad-8e52-29d33a6502ad,OI-cca9228c-f83d-4128-94d9-6b5eb944f23f,PID-89334032-db8d-4821-a9ad-5bd9a5305740,Cement,123,100,12300,CID-4a14e75e-cd20-44a1-9696-d5eb1746b894,Anabrige LLP,Construction Materials,...,Construction Materials,7,2,912359,91235900,250.372942,100.0,3644,8.521679,8.07135
2,OR-1d383f77-5592-46ad-8e52-29d33a6502ad,OI-04e65e52-f3ab-46e6-bef4-e2c4b2c4ee3a,PID-3e2ce142-a70d-4da4-8770-2079f75f43a9,Roofing Sheets,1625,7,11375,CID-4a14e75e-cd20-44a1-9696-d5eb1746b894,Anabrige LLP,Construction Materials,...,Construction Materials,7,2,9063971,63447797,2503.167909,7.0,3621,8.514499,8.038663
3,OR-343ce41a-322b-4823-8afc-52242973d163,OI-6125f927-0d99-48d5-9fa2-74b8a310af46,PID-fec6ed6f-7ac1-4655-b4a7-9766cff15b7b,Engine Oil,263,40,10520,CID-f922f515-1a7a-4640-959a-3c2f093f4d47,FULCRO,Automotive,...,Automotive,4,13,828858,33154320,507.878676,40.0,1632,8.404412,7.985907
4,OR-1310822f-e948-4c29-8da7-beec09196076,OI-54edae96-61a7-4666-8fe8-50d10dfd9323,PID-8b2ea2ce-e1df-4139-b179-3e7f217a9f9b,Conveyor Belt,306,1000,306000,CID-169eb6d1-27a1-4859-b7c8-5073ca1eb201,Taylor Ltd,Industrial Equipment,...,Industrial Equipment,10,5,679669,679669000,504.579807,1000.0,1347,8.550854,7.939866


In [67]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_PRODUCT_KPI")