# Use O2C_Template Customized Notebook Template

In [48]:
import pandas as pd
import numpy as np

In [49]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [50]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [51]:
df = df[df['INVOICESTATUS'] == 'Paid']
df_new = df[df['INVOICESTATUS'] != 'Paid']

In [52]:
df.shape, df_new.shape

((170290, 37), (0, 37))

In [53]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'])
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'])
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'])
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])

In [54]:
# Total Sales Volume
total_sales_volume = df.groupby('PRODUCTID')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')

In [55]:
# Total Sales Value
total_sales_value = df.groupby('PRODUCTID')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')

In [56]:
# Average Order Quantity
average_order_quantity = df.groupby('PRODUCTID')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')

In [57]:
# Average Unit Price
average_unit_price = df.groupby('PRODUCTID')['UNITPRICE'].mean().reset_index(name='Average Unit Price')

In [58]:
# Number of Orders
number_of_orders = df.groupby('PRODUCTID')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [59]:
category_sales_volume = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')
category_sales_value = df.groupby('PRODUCTCATEGORY')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')
category_average_order_quantity = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')
category_average_unit_price = df.groupby('PRODUCTCATEGORY')['UNITPRICE'].mean().reset_index(name='Average Unit Price')
category_number_of_orders = df.groupby('PRODUCTCATEGORY')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [60]:
delivery_performance.head()

Unnamed: 0,PRODUCTID,Average Delivery Time
0,PID-064e463d-7de2-47bb-80f8-02b6fda2392a,8.454769
1,PID-08d031fd-c61f-47f3-b4d1-e9c79ba23b68,8.474123
2,PID-0952d502-dd95-42ff-8668-7551b8ba2d62,8.518086
3,PID-0dcaa3ac-5e13-4b31-b42a-70b594207dab,8.501065
4,PID-1194cfe3-dea0-496c-b865-3c38d22371cc,8.436611


In [61]:
type_sales_volume = df.groupby('PRODUCTTYPE')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')
type_sales_value = df.groupby('PRODUCTTYPE')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')
type_average_order_quantity = df.groupby('PRODUCTTYPE')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')
type_average_unit_price = df.groupby('PRODUCTTYPE')['UNITPRICE'].mean().reset_index(name='Average Unit Price')
type_number_of_orders = df.groupby('PRODUCTTYPE')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [62]:
# Delivery Performance
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])
df['Delivery Time'] = (df['DELIVEREDON'] - df['ORDERDATE']).dt.days
delivery_performance = df.groupby('PRODUCTID')['Delivery Time'].mean().reset_index(name='Average Delivery Time')

In [63]:
# Invoice Performance
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['Invoice Time'] = (df['PAYMENTDATE'] - df['INVOICEDATE']).dt.days
invoice_performance = df.groupby('PRODUCTID')['Invoice Time'].mean().reset_index(name='Average Invoice Time')

In [64]:
# Merge all KPIs into a single dataframe for PRODUCTID
product_kpis = total_sales_volume.merge(total_sales_value, on='PRODUCTID') \
                                 .merge(average_order_quantity, on='PRODUCTID') \
                                 .merge(average_unit_price, on='PRODUCTID') \
                                 .merge(number_of_orders, on='PRODUCTID') \
                                 .merge(delivery_performance, on='PRODUCTID') \
                                 .merge(invoice_performance, on='PRODUCTID')

In [65]:
product_kpis.shape

(62, 8)

In [66]:
# Merge the new features back into the original dataframe
df = df.merge(product_kpis, on='PRODUCTID', how='left')

In [67]:
df = df[['PRODUCTID', 'PRODUCTNAME','PRODUCTCATEGORY','PRODUCTTYPE',
       'Total Sales Volume', 'Total Sales Value', 'Average Order Quantity',
       'Average Unit Price', 'Number of Orders', 'Average Delivery Time',
       'Average Invoice Time']]

In [72]:
df.head()

Unnamed: 0,PRODUCTID,PRODUCTNAME,PRODUCTCATEGORY,PRODUCTTYPE,Total Sales Volume,Total Sales Value,Average Order Quantity,Average Unit Price,Number of Orders,Average Delivery Time,Average Invoice Time
0,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
1,PID-89334032-db8d-4821-a9ad-5bd9a5305740,Cement,Construction Materials,Cement,912359,91235900,250.372942,100.0,3644,8.521679,8.07135
2,PID-3e2ce142-a70d-4da4-8770-2079f75f43a9,Roofing Sheets,Construction Materials,Roofing Sheets,9063971,63447797,2503.167909,7.0,3621,8.514499,8.038663
3,PID-fec6ed6f-7ac1-4655-b4a7-9766cff15b7b,Engine Oil,Automotive,Engine Oil,828858,33154320,507.878676,40.0,1632,8.404412,7.985907
4,PID-8b2ea2ce-e1df-4139-b179-3e7f217a9f9b,Conveyor Belt,Industrial Equipment,Conveyor Belt,679669,679669000,504.579807,1000.0,1347,8.550854,7.939866


In [68]:
df.shape

(170290, 11)

In [69]:
df = df.drop_duplicates()

In [70]:
df.shape

(62, 11)

In [67]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_PRODUCT_KPI")