# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [4]:
df = df[df['INVOICESTATUS'] == 'Paid']
df_new = df[df['INVOICESTATUS'] != 'Paid']

In [5]:
df.shape, df_new.shape

((170290, 37), (0, 37))

In [6]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'])
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'])
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'])
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])

In [7]:
# Total Sales Volume
total_sales_volume = df.groupby('PRODUCTID')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')

In [8]:
# Total Sales Value
total_sales_value = df.groupby('PRODUCTID')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')

In [9]:
# Average Order Quantity
average_order_quantity = df.groupby('PRODUCTID')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')

In [10]:
# Average Unit Price
average_unit_price = df.groupby('PRODUCTID')['UNITPRICE'].mean().reset_index(name='Average Unit Price')

In [11]:
# Number of Orders
number_of_orders = df.groupby('PRODUCTID')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [12]:
category_sales_volume = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')
category_sales_value = df.groupby('PRODUCTCATEGORY')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')
category_average_order_quantity = df.groupby('PRODUCTCATEGORY')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')
category_average_unit_price = df.groupby('PRODUCTCATEGORY')['UNITPRICE'].mean().reset_index(name='Average Unit Price')
category_number_of_orders = df.groupby('PRODUCTCATEGORY')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [45]:
delivery_performance.head()

Unnamed: 0,PRODUCTID,Average Delivery Time
0,PID-064e463d-7de2-47bb-80f8-02b6fda2392a,8.454769
1,PID-08d031fd-c61f-47f3-b4d1-e9c79ba23b68,8.474123
2,PID-0952d502-dd95-42ff-8668-7551b8ba2d62,8.518086
3,PID-0dcaa3ac-5e13-4b31-b42a-70b594207dab,8.501065
4,PID-1194cfe3-dea0-496c-b865-3c38d22371cc,8.436611


In [13]:
type_sales_volume = df.groupby('PRODUCTTYPE')['ORDERQUANTITY'].sum().reset_index(name='Total Sales Volume')
type_sales_value = df.groupby('PRODUCTTYPE')['ORDERVALUE'].sum().reset_index(name='Total Sales Value')
type_average_order_quantity = df.groupby('PRODUCTTYPE')['ORDERQUANTITY'].mean().reset_index(name='Average Order Quantity')
type_average_unit_price = df.groupby('PRODUCTTYPE')['UNITPRICE'].mean().reset_index(name='Average Unit Price')
type_number_of_orders = df.groupby('PRODUCTTYPE')['ORDERID'].nunique().reset_index(name='Number of Orders')

In [14]:
# Delivery Performance
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'])
df['Delivery Time'] = (df['DELIVEREDON'] - df['ORDERDATE']).dt.days
delivery_performance = df.groupby('PRODUCTID')['Delivery Time'].mean().reset_index(name='Average Delivery Time')

In [15]:
# Invoice Performance
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'])
df['Invoice Time'] = (df['PAYMENTDATE'] - df['INVOICEDATE']).dt.days
invoice_performance = df.groupby('PRODUCTID')['Invoice Time'].mean().reset_index(name='Average Invoice Time')

In [16]:
# Merge all KPIs into a single dataframe for PRODUCTID
product_kpis = total_sales_volume.merge(total_sales_value, on='PRODUCTID') \
                                 .merge(average_order_quantity, on='PRODUCTID') \
                                 .merge(average_unit_price, on='PRODUCTID') \
                                 .merge(number_of_orders, on='PRODUCTID') \
                                 .merge(delivery_performance, on='PRODUCTID') \
                                 .merge(invoice_performance, on='PRODUCTID')

In [47]:
product_kpis.shape

(62, 8)

In [17]:
# Merge the new features back into the original dataframe
df = df.merge(product_kpis, on='PRODUCTID', how='left')

In [23]:
df = df[['PRODUCTID', 'PRODUCTNAME','PRODUCTCATEGORY','PRODUCTTYPE',
       'Total Sales Volume', 'Total Sales Value', 'Average Order Quantity',
       'Average Unit Price', 'Number of Orders', 'Average Delivery Time',
       'Average Invoice Time']]

In [24]:
df.shape

(170290, 13)

In [25]:
df = df.drop_duplicates()

In [26]:
df.shape

(10770, 13)

In [28]:
df['PRODUCTID'].nunique()

62

In [43]:
df[df['PRODUCTID']== 'PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2'].head(100)

Unnamed: 0,PRODUCTID,PRODUCTNAME,PRODUCTCATEGORY,PRODUCTTYPE,Delivery Time,Invoice Time,Total Sales Volume,Total Sales Value,Average Order Quantity,Average Unit Price,Number of Orders,Average Delivery Time,Average Invoice Time
0,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,7,2,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
26,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,6,15,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
179,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,9,2,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
294,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,6,6,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
331,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,6,4,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6524,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,6,11,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
6613,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,5,13,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
6629,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,9,9,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888
6815,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,Construction Materials,Insulation Material,5,8,18260514,36521028,5024.90754,2.0,3634,8.531921,8.058888


In [67]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_PRODUCT_KPI")