# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
#table_name = 'ORDER_TO_CASH_ENRICHED'
table_name = 'ORDER_TO_CASH_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [4]:
df_train = df[df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]
df_test = df[~df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]

In [5]:
df_train.shape, df_test.shape

((170161, 40), (693, 40))

In [6]:
# Assuming df is your dataframe
df_train['ORDERDATE'] = pd.to_datetime(df_train['ORDERDATE'])
df_train['DELIVERYDATE'] = pd.to_datetime(df_train['DELIVERYDATE'])
df_train['INVOICEDATE'] = pd.to_datetime(df_train['INVOICEDATE'])
df_train['PAYMENTDATE'] = pd.to_datetime(df_train['PAYMENTDATE'])
df_train['INVOICEDUEDATE'] = pd.to_datetime(df_train['INVOICEDUEDATE'])

In [7]:
df_train['DELIVEREDON'] = pd.to_datetime(df_train['DELIVEREDON'])

In [8]:
# Order Processing Time
df_train['OR_ORDER_PROCESSING_TIME'] = (df_train['DELIVERYDATE'] - df_train['ORDERDATE']).dt.days

# Invoice Processing Time
df_train['OR_INVOICE_PROCESSING_TIME'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDATE']).dt.days

# Order Value per Unit
df_train['OR_ORDER_VALUE_PER_UNIT'] = df_train['ORDERVALUE'] / df_train['ORDERQUANTITY']

In [9]:
# Delivery Delay
df_train['OR_DELIVERY_DELAY'] = (df_train['DELIVEREDON'] - df_train['DELIVERYDATE']).dt.days

In [10]:
# Payment Delay
df_train['OR_PAYMENT_DELAY'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDUEDATE']).dt.days

In [11]:
# Customer Lifetime Value (CLV)
clv = df_train.groupby('CUSTOMERID')['ORDERVALUE'].sum().reset_index()
clv.columns = ['CUSTOMERID', 'CC_CUSTOMER_LIFETIME_VALUE']
df_train = df_train.merge(clv, on='CUSTOMERID', how='left')

In [12]:
# Order Frequency
order_freq = df_train.groupby('CUSTOMERID')['ORDERID'].nunique().reset_index()
order_freq.columns = ['CUSTOMERID', 'CC_ORDER_FREQUENCY']
df_train = df_train.merge(order_freq, on='CUSTOMERID', how='left')

In [13]:
# Average Order Value
avg_order_value = df_train.groupby('CUSTOMERID')['ORDERVALUE'].mean().reset_index()
avg_order_value.columns = ['CUSTOMERID', 'CC_AVERAGE_ORDER_VALUE']
df_train = df_train.merge(avg_order_value, on='CUSTOMERID', how='left')

In [14]:
# Recency
latest_order_date = df_train.groupby('CUSTOMERID')['ORDERDATE'].max().reset_index()
latest_order_date.columns = ['CUSTOMERID', 'CC_LAST_ORDER_DATE']
latest_order_date['CC_RECENCY'] = (pd.to_datetime('today') - latest_order_date['CC_LAST_ORDER_DATE']).dt.days
df_train = df_train.merge(latest_order_date[['CUSTOMERID', 'CC_RECENCY']], on='CUSTOMERID', how='left')

In [15]:
# Preferred Payment Method
preferred_payment_method = df_train.groupby('CUSTOMERID')['PAYMENTMETHOD'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_payment_method.columns = ['CUSTOMERID', 'CC_PREFERRED_PAYMENT_METHOD']
df_train = df_train.merge(preferred_payment_method, on='CUSTOMERID', how='left')

In [16]:
# Preferred Product Category
preferred_product_category = df_train.groupby('CUSTOMERID')['PRODUCTCATEGORY'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_category.columns = ['CUSTOMERID', 'CC_PREFERRED_PRODUCT_CATEGORY']
df_train = df_train.merge(preferred_product_category, on='CUSTOMERID', how='left')

In [17]:
# Preferred Product Category
preferred_product_type = df_train.groupby('CUSTOMERID')['PRODUCTTYPE'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_type.columns = ['CUSTOMERID', 'CC_PREFERRED_PRODUCT_TYPE']
df_train = df_train.merge(preferred_product_type, on='CUSTOMERID', how='left')

In [18]:
df_train.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'OR_ORDER_PROCESSING_TIME',
       'OR_INVOICE_PROCESSING_TIME', 'OR_ORDER_VALUE_PER_UNIT',
       'OR_DELIVERY_DELAY', 'OR_PAYMENT_DELAY', 'CC_CUSTOMER_LIFETIME_VALUE',
       'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY',
       'CC_PREFERRED_PAYMENT_METHOD', 'CC_PREFERRED_PRODUCT_CATEGORY',
       '

In [19]:
df_train.tail(2)

Unnamed: 0,ORDERID,ORDERITEMID,PRODUCTID,PRODUCTNAME,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CUSTOMERID,CUSTOMERNAME,PRODUCTCATEGORY,...,OR_ORDER_VALUE_PER_UNIT,OR_DELIVERY_DELAY,OR_PAYMENT_DELAY,CC_CUSTOMER_LIFETIME_VALUE,CC_ORDER_FREQUENCY,CC_AVERAGE_ORDER_VALUE,CC_RECENCY,CC_PREFERRED_PAYMENT_METHOD,CC_PREFERRED_PRODUCT_CATEGORY,CC_PREFERRED_PRODUCT_TYPE
170159,OR-749b44b5-42f0-4986-acb9-e593396396ca,OI-eee93794-fc3a-422e-85db-ad0613b4ba90,PID-5ebfa52f-0f4f-4d31-af38-e8c9c75a1593,Granola,7,3,21,CID-7247edbb-6148-4095-973c-5afb2409a37b,Sutura,Food Beverages,...,3.0,2,-18,13149901,530,12488.035138,1,Mobile Payments,Food Beverages,Energy Drink
170160,OR-749b44b5-42f0-4986-acb9-e593396396ca,OI-6efa3812-cd85-4a3d-a22c-6e9898e1b4e9,PID-3c84d073-9164-4016-9392-f12bf7e339d4,Greek Yogurt,287,3,861,CID-7247edbb-6148-4095-973c-5afb2409a37b,Sutura,Food Beverages,...,3.0,2,-18,13149901,530,12488.035138,1,Mobile Payments,Food Beverages,Energy Drink


In [20]:
# Average Order Processing Time
avg_order_processing_time = df_train.groupby('CUSTOMERID')['OR_ORDER_PROCESSING_TIME'].mean().reset_index()
avg_order_processing_time.columns = ['CUSTOMERID', 'CC_AVG_ORDER_PROCESSING_TIME']

In [21]:
# Average Invoice Processing Time
avg_invoice_processing_time = df_train.groupby('CUSTOMERID')['OR_INVOICE_PROCESSING_TIME'].mean().reset_index()
avg_invoice_processing_time.columns = ['CUSTOMERID', 'CC_AVG_INVOICE_PROCESSING_TIME']

In [22]:
# Average Delivery Delay
avg_delivery_delay = df_train.groupby('CUSTOMERID')['OR_DELIVERY_DELAY'].mean().reset_index()
avg_delivery_delay.columns = ['CUSTOMERID', 'CC_AVG_DELIVERY_DELAY']

In [23]:
# Average Payment Delay
avg_payment_delay = df_train.groupby('CUSTOMERID')['OR_PAYMENT_DELAY'].mean().reset_index()
avg_payment_delay.columns = ['CUSTOMERID', 'CC_AVG_PAYMENT_DELAY']

In [24]:
df.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID'],
      dtype='object')

In [25]:
# Total Orders
total_orders = df_train.groupby('CUSTOMERID')['OR_ORDER_PROCESSING_TIME'].count().reset_index()
total_orders.columns = ['CUSTOMERID', 'CC_TOTAL_ORDERS']

In [26]:
# Total Delays
total_delays = df_train.groupby('CUSTOMERID')[['OR_DELIVERY_DELAY', 'OR_PAYMENT_DELAY']].sum().reset_index()
total_delays['CC_TOTAL_DELAYS'] = total_delays['OR_DELIVERY_DELAY'] + total_delays['OR_PAYMENT_DELAY']
total_delays = total_delays[['CUSTOMERID', 'CC_TOTAL_DELAYS']]

In [27]:
# Order Consistency
order_consistency = df_train.groupby('CUSTOMERID')['OR_ORDER_PROCESSING_TIME'].std().reset_index()
order_consistency.columns = ['CUSTOMERID', 'CC_ORDER_CONSISTENCY']

In [28]:
# Invoice Consistency
invoice_consistency = df_train.groupby('CUSTOMERID')['OR_INVOICE_PROCESSING_TIME'].std().reset_index()
invoice_consistency.columns = ['CUSTOMERID', 'CC_INVOICE_CONSISTENCY']

In [29]:
# Delivery Consistency
delivery_consistency = df_train.groupby('CUSTOMERID')['OR_DELIVERY_DELAY'].std().reset_index()
delivery_consistency.columns = ['CUSTOMERID', 'CC_DELIVERY_CONSISTENCY']

In [30]:
# Payment Consistency
payment_consistency = df_train.groupby('CUSTOMERID')['OR_PAYMENT_DELAY'].std().reset_index()
payment_consistency.columns = ['CUSTOMERID', 'CC_PAYMENT_CONSISTENCY']

In [31]:
# Merging all features into a single dataframe
features = [avg_order_processing_time, avg_invoice_processing_time, avg_delivery_delay, avg_payment_delay, total_orders,total_delays, order_consistency, invoice_consistency, delivery_consistency, payment_consistency]
customer_features = df[['CUSTOMERID']].drop_duplicates().reset_index(drop=True)

In [32]:
for feature in features:
    customer_features = customer_features.merge(feature, on='CUSTOMERID', how='left')

In [33]:
# Merge the new features back into the original dataframe
df_train = df_train.merge(customer_features, on='CUSTOMERID', how='left')

In [34]:
df_train.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'OR_ORDER_PROCESSING_TIME',
       'OR_INVOICE_PROCESSING_TIME', 'OR_ORDER_VALUE_PER_UNIT',
       'OR_DELIVERY_DELAY', 'OR_PAYMENT_DELAY', 'CC_CUSTOMER_LIFETIME_VALUE',
       'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY',
       'CC_PREFERRED_PAYMENT_METHOD', 'CC_PREFERRED_PRODUCT_CATEGORY',
       '

In [35]:
df = df_train[['CUSTOMERID', 'CUSTOMERNAME', 'CUSTOMERTYPE','COMPANYTYPE',
               'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE',
               'CONTACTDETAILS','EMAILDETAILS','ADDRESSDETAILS','ADMINDETAILS','CREDITLIMIT', 
               'CC_CUSTOMER_LIFETIME_VALUE','CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY',
               'CC_PREFERRED_PAYMENT_METHOD', 'CC_PREFERRED_PRODUCT_CATEGORY',
               'CC_PREFERRED_PRODUCT_TYPE', 'CC_AVG_ORDER_PROCESSING_TIME',
               'CC_AVG_INVOICE_PROCESSING_TIME', 'CC_AVG_DELIVERY_DELAY',
               'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS',
               'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY',
               'CC_DELIVERY_CONSISTENCY', 'CC_PAYMENT_CONSISTENCY']]

In [36]:
df.shape

(170161, 29)

In [37]:
df = df.drop_duplicates()

In [38]:
df.shape

(150, 29)

In [40]:
df.head(2)

Unnamed: 0,CUSTOMERID,CUSTOMERNAME,CUSTOMERTYPE,COMPANYTYPE,CUSTOMERSINCE,PAYMENTTERMS,CREDITLIMITTYPE,CONTACTDETAILS,EMAILDETAILS,ADDRESSDETAILS,...,CC_AVG_ORDER_PROCESSING_TIME,CC_AVG_INVOICE_PROCESSING_TIME,CC_AVG_DELIVERY_DELAY,CC_AVG_PAYMENT_DELAY,CC_TOTAL_ORDERS,CC_TOTAL_DELAYS,CC_ORDER_CONSISTENCY,CC_INVOICE_CONSISTENCY,CC_DELIVERY_CONSISTENCY,CC_PAYMENT_CONSISTENCY
0,CID-a39aa76a-afba-4e60-baad-608d8de86db5,Hammers and Nails,Construction Materials,Ltd,YR-2020,Net 30 days,Low,001-210-646-5688x47857,josephscott@hansen.com,066 Hughes Island Suite 558\nNorth Ryanborough...,...,14.952668,27.262478,3.107573,2.716867,1162,6768,1.831958,12.91111,1.434638,12.928207
3,CID-2decf55f-b43b-42ab-a3a1-1d1b6c3d40a4,PreScott Works,Construction Materials,Ltd,YR-2016,Net 60 days,Very High,001-824-063-1538x302,griffithsuzanne@anderson.com,49148 Mclaughlin Groves Suite 661\nJoannemouth...,...,15.017588,38.266332,2.985762,-6.252094,1194,-3900,1.782995,19.590213,1.380246,19.556849


In [None]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_CUSTOMER_KPI")