# Use O2C_Template Customized Notebook Template

In [29]:
import pandas as pd
import numpy as np

In [30]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [31]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [32]:
df_train = df[df['INVOICESTATUS'] == 'Paid']
df_new = df[df['INVOICESTATUS'] != 'Paid']

In [33]:
df_train.shape, df_new.shape

((170290, 37), (3001, 37))

In [34]:
# Assuming df is your dataframe
df_train['ORDERDATE'] = pd.to_datetime(df_train['ORDERDATE'])
df_train['DELIVERYDATE'] = pd.to_datetime(df_train['DELIVERYDATE'])
df_train['INVOICEDATE'] = pd.to_datetime(df_train['INVOICEDATE'])
df_train['PAYMENTDATE'] = pd.to_datetime(df_train['PAYMENTDATE'])
df_train['INVOICEDUEDATE'] = pd.to_datetime(df_train['INVOICEDUEDATE'])

In [35]:
df_train['DELIVEREDON'] = pd.to_datetime(df_train['DELIVEREDON'])

In [36]:
# Order Processing Time
df_train['ORDER_PROCESSING_TIME'] = (df_train['DELIVERYDATE'] - df_train['ORDERDATE']).dt.days

# Invoice Processing Time
df_train['INVOICE_PROCESSING_TIME'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDATE']).dt.days

# Order Value per Unit
df_train['ORDER_VALUE_PER_UNIT'] = df_train['ORDERVALUE'] / df_train['ORDERQUANTITY']

In [37]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 170290 entries, 0 to 170289
Data columns (total 40 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   ORDERID                  170290 non-null  object        
 1   ORDERITEMID              170290 non-null  object        
 2   PRODUCTID                170290 non-null  object        
 3   PRODUCTNAME              170290 non-null  object        
 4   ORDERQUANTITY            170290 non-null  int16         
 5   UNITPRICE                170290 non-null  int32         
 6   ORDERVALUE               170290 non-null  int32         
 7   CUSTOMERID               170290 non-null  object        
 8   CUSTOMERNAME             170290 non-null  object        
 9   PRODUCTCATEGORY          170290 non-null  object        
 10  ORDERDATE                170290 non-null  datetime64[ns]
 11  ORDERSTATUS              170290 non-null  object        
 12  ORDERAMOUNT          

In [38]:
# Delivery Delay
df_train['DELIVERY_DELAY'] = (df_train['DELIVERYDATE'] - df_train['DELIVEREDON']).dt.days

In [39]:
# Payment Delay
df_train['PAYMENT_DELAY'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDUEDATE']).dt.days

In [40]:
# Customer Lifetime Value (CLV)
clv = df_train.groupby('CUSTOMERID')['ORDERVALUE'].sum().reset_index()
clv.columns = ['CUSTOMERID', 'CUSTOMER_LIFETIME_VALUE']
df_train = df_train.merge(clv, on='CUSTOMERID', how='left')

In [41]:
# Order Frequency
order_freq = df_train.groupby('CUSTOMERID')['ORDERID'].nunique().reset_index()
order_freq.columns = ['CUSTOMERID', 'ORDER_FREQUENCY']
df_train = df_train.merge(order_freq, on='CUSTOMERID', how='left')

In [42]:
# Average Order Value
avg_order_value = df_train.groupby('CUSTOMERID')['ORDERVALUE'].mean().reset_index()
avg_order_value.columns = ['CUSTOMERID', 'AVERAGE_ORDER_VALUE']
df_train = df_train.merge(avg_order_value, on='CUSTOMERID', how='left')

In [43]:
# Recency
latest_order_date = df_train.groupby('CUSTOMERID')['ORDERDATE'].max().reset_index()
latest_order_date.columns = ['CUSTOMERID', 'LAST_ORDER_DATE']
latest_order_date['RECENCY'] = (pd.to_datetime('today') - latest_order_date['LAST_ORDER_DATE']).dt.days
df_train = df_train.merge(latest_order_date[['CUSTOMERID', 'RECENCY']], on='CUSTOMERID', how='left')

In [45]:
# Preferred Payment Method
preferred_payment_method = df_train.groupby('CUSTOMERID')['PAYMENTMETHOD'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_payment_method.columns = ['CUSTOMERID', 'PREFERRED_PAYMENT_METHOD']
df_train = df_train.merge(preferred_payment_method, on='CUSTOMERID', how='left')

In [46]:
# Preferred Product Category
preferred_product_category = df_train.groupby('CUSTOMERID')['PRODUCTCATEGORY'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_category.columns = ['CUSTOMERID', 'PREFERRED_PRODUCT_CATEGORY']
df_train = df_train.merge(preferred_product_category, on='CUSTOMERID', how='left')

In [48]:
# Preferred Product Category
preferred_product_type = df_train.groupby('CUSTOMERID')['PRODUCTTYPE'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_type.columns = ['CUSTOMERID', 'PREFERRED_PRODUCT_TYPE']
df_train = df_train.merge(preferred_product_type, on='CUSTOMERID', how='left')

In [53]:
df_train.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'NAME', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CREDITRATING', 'CUSTOMERTYPE', 'ORDER_PROCESSING_TIME',
       'INVOICE_PROCESSING_TIME', 'ORDER_VALUE_PER_UNIT', 'DELIVERY_DELAY',
       'PAYMENT_DELAY', 'CUSTOMER_LIFETIME_VALUE', 'ORDER_FREQUENCY',
       'AVERAGE_ORDER_VALUE', 'RECENCY', 'PREFERRED_PAYMENT_METHOD',
       'PREFERRED_PRODUCT_CATEGORY', 'PREFERRED_PRODUCT_TYPE'],
      dtype='object')

In [50]:
df_train.tail()

Unnamed: 0,ORDERID,ORDERITEMID,PRODUCTID,PRODUCTNAME,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CUSTOMERID,CUSTOMERNAME,PRODUCTCATEGORY,...,ORDER_VALUE_PER_UNIT,DELIVERY_DELAY,PAYMENT_DELAY,CUSTOMER_LIFETIME_VALUE,ORDER_FREQUENCY,AVERAGE_ORDER_VALUE,RECENCY,PREFERRED_PAYMENT_METHOD,PREFERRED_PRODUCT_CATEGORY,PREFERRED_PRODUCT_TYPE
170285,OR-2dabf315-433e-4270-b350-9b2cb5985154,OI-5eaa0c0d-d570-4664-b0e7-7e5df39c5253,PID-0dcaa3ac-5e13-4b31-b42a-70b594207dab,Forklift,63,21000,1323000,CID-daa6ccaf-6fc6-44ce-b20e-968a7c03815a,Tucker Ltd,Industrial Equipment,...,21000.0,-1,-1,1131716360,578,978147.242869,23,Cash,Industrial Equipment,CNC Machine
170286,OR-97494b41-325a-450b-a94e-080b789aaed2,OI-c081b576-f40e-4e1b-993d-33947388f5cc,PID-5b65abab-9d75-49db-aee2-764e76cdfa82,Air Purifier,296,99,29304,CID-c5edd6ef-7400-4734-9e4b-34eb79d01973,Haynes-Jackson,Consumer Goods,...,99.0,-3,5,41737562,586,35612.254266,24,Cards,Consumer Goods,Electric Kettle
170287,OR-97494b41-325a-450b-a94e-080b789aaed2,OI-282f0e69-41b1-4462-b239-038c20aea5d1,PID-f1c7c082-65d7-4b5a-b913-f135702db67f,LED Light Bulbs,3077,5,15385,CID-c5edd6ef-7400-4734-9e4b-34eb79d01973,Haynes-Jackson,Consumer Goods,...,5.0,-3,5,41737562,586,35612.254266,24,Cards,Consumer Goods,Electric Kettle
170288,OR-97494b41-325a-450b-a94e-080b789aaed2,OI-80d197e5-5a0b-46b7-bc41-dbb7a58b29f7,PID-064e463d-7de2-47bb-80f8-02b6fda2392a,Vacuum Cleaner,94,364,34216,CID-c5edd6ef-7400-4734-9e4b-34eb79d01973,Haynes-Jackson,Consumer Goods,...,364.0,-3,5,41737562,586,35612.254266,24,Cards,Consumer Goods,Electric Kettle
170289,OR-45577031-31dc-4a5c-a606-14d3b4bacf89,OI-62dfd989-12b7-4645-bf32-6cd223944082,PID-08d031fd-c61f-47f3-b4d1-e9c79ba23b68,Herbal Tea,480,2,960,CID-c959e86d-be75-473b-8ae9-56e6fc113cbf,TetherLink,Food Beverages,...,2.0,0,6,13789264,570,12095.845614,23,Wire Transfers,Food Beverages,Energy Drink


In [54]:
# Average Order Processing Time
avg_order_processing_time = df_train.groupby('CUSTOMERID')['ORDER_PROCESSING_TIME'].mean().reset_index()
avg_order_processing_time.columns = ['CUSTOMERID', 'AVG_ORDER_PROCESSING_TIME']

In [55]:
# Average Invoice Processing Time
avg_invoice_processing_time = df_train.groupby('CUSTOMERID')['INVOICE_PROCESSING_TIME'].mean().reset_index()
avg_invoice_processing_time.columns = ['CUSTOMERID', 'AVG_INVOICE_PROCESSING_TIME']

In [56]:
# Average Delivery Delay
avg_delivery_delay = df_train.groupby('CUSTOMERID')['DELIVERY_DELAY'].mean().reset_index()
avg_delivery_delay.columns = ['CUSTOMERID', 'AVG_DELIVERY_DELAY']

In [57]:
# Average Payment Delay
avg_payment_delay = df_train.groupby('CUSTOMERID')['PAYMENT_DELAY'].mean().reset_index()
avg_payment_delay.columns = ['CUSTOMERID', 'AVG_PAYMENT_DELAY']

In [58]:
# Total Delays
total_delays = df_train.groupby('CUSTOMERID')[['DELIVERY_DELAY', 'PAYMENT_DELAY']].sum().reset_index()
total_delays['TOTAL_DELAYS'] = total_delays['DELIVERY_DELAY'] + total_delays['PAYMENT_DELAY']
total_delays = total_delays[['CUSTOMERID', 'TOTAL_DELAYS']]

In [60]:
# Order Consistency
order_consistency = df_train.groupby('CUSTOMERID')['ORDER_PROCESSING_TIME'].std().reset_index()
order_consistency.columns = ['CUSTOMERID', 'ORDER_CONSISTENCY']

In [61]:
# Invoice Consistency
invoice_consistency = df_train.groupby('CUSTOMERID')['INVOICE_PROCESSING_TIME'].std().reset_index()
invoice_consistency.columns = ['CUSTOMERID', 'INVOICE_CONSISTENCY']

In [62]:
# Delivery Consistency
delivery_consistency = df_train.groupby('CUSTOMERID')['DELIVERY_DELAY'].std().reset_index()
delivery_consistency.columns = ['CUSTOMERID', 'DELIVERY_CONSISTENCY']

In [63]:
# Payment Consistency
payment_consistency = df_train.groupby('CUSTOMERID')['PAYMENT_DELAY'].std().reset_index()
payment_consistency.columns = ['CUSTOMERID', 'PAYMENT_CONSISTENCY']

In [64]:
# Merging all features into a single dataframe
features = [avg_order_processing_time, avg_invoice_processing_time, avg_delivery_delay, avg_payment_delay, total_delays, order_consistency, invoice_consistency, delivery_consistency, payment_consistency]
customer_features = df[['CUSTOMERID']].drop_duplicates().reset_index(drop=True)

In [65]:
for feature in features:
    customer_features = customer_features.merge(feature, on='CUSTOMERID', how='left')

In [None]:
# Merge the new features back into the original dataframe
df_train = df_train.merge(customer_features, on='CUSTOMERID', how='left')

In [None]:
df_train.head()

In [None]:
df_final = df_train.drop(['])