# Use O2C_Template Customized Notebook Template

In [29]:
import pandas as pd
import numpy as np

In [30]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [31]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [32]:
df_train = df[df['INVOICESTATUS'] == 'Paid']
df_new = df[df['INVOICESTATUS'] != 'Paid']

In [33]:
df_train.shape, df_new.shape

((170290, 37), (3001, 37))

In [34]:
# Assuming df is your dataframe
df_train['ORDERDATE'] = pd.to_datetime(df_train['ORDERDATE'])
df_train['DELIVERYDATE'] = pd.to_datetime(df_train['DELIVERYDATE'])
df_train['INVOICEDATE'] = pd.to_datetime(df_train['INVOICEDATE'])
df_train['PAYMENTDATE'] = pd.to_datetime(df_train['PAYMENTDATE'])
df_train['INVOICEDUEDATE'] = pd.to_datetime(df_train['INVOICEDUEDATE'])

In [35]:
df_train['DELIVEREDON'] = pd.to_datetime(df_train['DELIVEREDON'])

In [36]:
# Order Processing Time
df_train['ORDER_PROCESSING_TIME'] = (df_train['DELIVERYDATE'] - df_train['ORDERDATE']).dt.days

# Invoice Processing Time
df_train['INVOICE_PROCESSING_TIME'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDATE']).dt.days

# Order Value per Unit
df_train['ORDER_VALUE_PER_UNIT'] = df_train['ORDERVALUE'] / df_train['ORDERQUANTITY']

In [37]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 170290 entries, 0 to 170289
Data columns (total 40 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   ORDERID                  170290 non-null  object        
 1   ORDERITEMID              170290 non-null  object        
 2   PRODUCTID                170290 non-null  object        
 3   PRODUCTNAME              170290 non-null  object        
 4   ORDERQUANTITY            170290 non-null  int16         
 5   UNITPRICE                170290 non-null  int32         
 6   ORDERVALUE               170290 non-null  int32         
 7   CUSTOMERID               170290 non-null  object        
 8   CUSTOMERNAME             170290 non-null  object        
 9   PRODUCTCATEGORY          170290 non-null  object        
 10  ORDERDATE                170290 non-null  datetime64[ns]
 11  ORDERSTATUS              170290 non-null  object        
 12  ORDERAMOUNT          

In [38]:
# Delivery Delay
df_train['DELIVERY_DELAY'] = (df_train['DELIVERYDATE'] - df_train['DELIVEREDON']).dt.days

In [39]:
# Payment Delay
df_train['PAYMENT_DELAY'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDUEDATE']).dt.days

In [40]:
# Customer Lifetime Value (CLV)
clv = df_train.groupby('CUSTOMERID')['ORDERVALUE'].sum().reset_index()
clv.columns = ['CUSTOMERID', 'CUSTOMER_LIFETIME_VALUE']
df_train = df_train.merge(clv, on='CUSTOMERID', how='left')

In [41]:
# Order Frequency
order_freq = df_train.groupby('CUSTOMERID')['ORDERID'].nunique().reset_index()
order_freq.columns = ['CUSTOMERID', 'ORDER_FREQUENCY']
df_train = df_train.merge(order_freq, on='CUSTOMERID', how='left')

In [42]:
# Average Order Value
avg_order_value = df_train.groupby('CUSTOMERID')['ORDERVALUE'].mean().reset_index()
avg_order_value.columns = ['CUSTOMERID', 'AVERAGE_ORDER_VALUE']
df_train = df_train.merge(avg_order_value, on='CUSTOMERID', how='left')

In [43]:
# Recency
latest_order_date = df_train.groupby('CUSTOMERID')['ORDERDATE'].max().reset_index()
latest_order_date.columns = ['CUSTOMERID', 'LAST_ORDER_DATE']
latest_order_date['RECENCY'] = (pd.to_datetime('today') - latest_order_date['LAST_ORDER_DATE']).dt.days
df_train = df_train.merge(latest_order_date[['CUSTOMERID', 'RECENCY']], on='CUSTOMERID', how='left')

In [45]:
# Preferred Payment Method
preferred_payment_method = df_train.groupby('CUSTOMERID')['PAYMENTMETHOD'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_payment_method.columns = ['CUSTOMERID', 'PREFERRED_PAYMENT_METHOD']
df_train = df_train.merge(preferred_payment_method, on='CUSTOMERID', how='left')

In [46]:
# Preferred Product Category
preferred_product_category = df_train.groupby('CUSTOMERID')['PRODUCTCATEGORY'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_category.columns = ['CUSTOMERID', 'PREFERRED_PRODUCT_CATEGORY']
df_train = df_train.merge(preferred_product_category, on='CUSTOMERID', how='left')

In [48]:
# Preferred Product Category
preferred_product_type = df_train.groupby('CUSTOMERID')['PRODUCTTYPE'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_type.columns = ['CUSTOMERID', 'PREFERRED_PRODUCT_TYPE']
df_train = df_train.merge(preferred_product_type, on='CUSTOMERID', how='left')

In [49]:
df_train.head()

Unnamed: 0,ORDERID,ORDERITEMID,PRODUCTID,PRODUCTNAME,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CUSTOMERID,CUSTOMERNAME,PRODUCTCATEGORY,...,ORDER_VALUE_PER_UNIT,DELIVERY_DELAY,PAYMENT_DELAY,CUSTOMER_LIFETIME_VALUE,ORDER_FREQUENCY,AVERAGE_ORDER_VALUE,RECENCY,PREFERRED_PAYMENT_METHOD,PREFERRED_PRODUCT_CATEGORY,PREFERRED_PRODUCT_TYPE
0,OR-1d383f77-5592-46ad-8e52-29d33a6502ad,OI-5759e816-1866-4073-8d3c-6ad53993c09b,PID-4b87db26-edd8-4b4a-8143-adb85ba6dbe2,Insulation Material,1630,2,3260,CID-4a14e75e-cd20-44a1-9696-d5eb1746b894,Anabrige LLP,Construction Materials,...,2.0,0,-3,88348548,595,73623.79,22,Cash,Construction Materials,PVC Pipes
1,OR-1d383f77-5592-46ad-8e52-29d33a6502ad,OI-cca9228c-f83d-4128-94d9-6b5eb944f23f,PID-89334032-db8d-4821-a9ad-5bd9a5305740,Cement,123,100,12300,CID-4a14e75e-cd20-44a1-9696-d5eb1746b894,Anabrige LLP,Construction Materials,...,100.0,0,-3,88348548,595,73623.79,22,Cash,Construction Materials,PVC Pipes
2,OR-1d383f77-5592-46ad-8e52-29d33a6502ad,OI-04e65e52-f3ab-46e6-bef4-e2c4b2c4ee3a,PID-3e2ce142-a70d-4da4-8770-2079f75f43a9,Roofing Sheets,1625,7,11375,CID-4a14e75e-cd20-44a1-9696-d5eb1746b894,Anabrige LLP,Construction Materials,...,7.0,0,-3,88348548,595,73623.79,22,Cash,Construction Materials,PVC Pipes
3,OR-343ce41a-322b-4823-8afc-52242973d163,OI-6125f927-0d99-48d5-9fa2-74b8a310af46,PID-fec6ed6f-7ac1-4655-b4a7-9766cff15b7b,Engine Oil,263,40,10520,CID-f922f515-1a7a-4640-959a-3c2f093f4d47,FULCRO,Automotive,...,40.0,0,8,36452740,649,27826.519084,26,Cash,Automotive,Radiator Coolant
4,OR-1310822f-e948-4c29-8da7-beec09196076,OI-54edae96-61a7-4666-8fe8-50d10dfd9323,PID-8b2ea2ce-e1df-4139-b179-3e7f217a9f9b,Conveyor Belt,306,1000,306000,CID-169eb6d1-27a1-4859-b7c8-5073ca1eb201,Taylor Ltd,Industrial Equipment,...,1000.0,-3,-2,1144673360,584,975851.116795,22,Cash,Industrial Equipment,Industrial Robot
