# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df_final = sf_df.to_pandas()

In [4]:
df_train = df_final[df_final['INVOICESTATUS'] == 'Paid']
df_new = df_final[df_final['INVOICESTATUS'] != 'Paid']

In [5]:
df_train.shape, df_new.shape

((170290, 68), (3001, 68))

In [7]:
df = df_final.copy()

In [8]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'], errors='coerce')
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'], errors='coerce')
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'], errors='coerce')
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'], errors='coerce')
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'], errors='coerce')
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'], errors='coerce')

In [9]:
df['MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.month
df['YEAR'] = pd.to_datetime(df['ORDERDATE']).dt.year
df['WEEK'] = pd.to_datetime(df['ORDERDATE']).dt.isocalendar().week
df['WEEKEND'] = pd.to_datetime(df['ORDERDATE']).dt.weekday >= 5  # True if the day is Saturday or Sunday
df['DAY_OF_THE_MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.day

In [10]:
# Delivery Delay
df['DELIVERY_DELAY'] = (df['SHIPMENTDATE'] - df['DELIVEREDON']).dt.days

# Payment Delay
df['PAYMENT_DELAY'] = (df['PAYMENTDATE'] - df['INVOICEDUEDATE']).dt.days

In [11]:
df.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'NAME', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CREDITRATING', 'CUSTOMERTYPE', 'CC_CUSTOMER_LIFETIME_VALUE',
       'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY',
       'CC_PREFERRED_PAYMENT_METHOD', 'CC_PREFERRED_PRODUCT_CATEGORY',
       'CC_PREFERRED_PRODUCT_TYPE', 'CC_AVG_ORDER_PROCESSING_TIME',
       'CC_AVG_INVOICE_PROCESSING_TIME', 'CC_AVG_DELIVERY_DELAY',
       'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS',
       'CC

In [13]:
cols = ['ORDERQUANTITY','UNITPRICE','ORDERVALUE'
,'CC_CUSTOMER_LIFETIME_VALUE','CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY','CC_AVG_ORDER_PROCESSING_TIME'
,'CC_AVG_INVOICE_PROCESSING_TIME', 'CC_AVG_DELIVERY_DELAY','CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS'
,'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY','CC_DELIVERY_CONSISTENCY', 'CC_PAYMENT_CONSISTENCY'
,'PR_TOTAL_SALES_VOLUME', 'PR_TOTAL_SALES_VALUE','PR_AVG_ORDER_QUANTITY', 'PR_AVG_UNIT_PRICE', 'PR_NUMBER_OF_ORDERS'
,'PR_AVG_DELIVERY_TIME', 'PR_AVG_INVOICE_TIME', 'CAT_TOTAL_SALES_VOLUME','CAT_TOTAL_SALES_VALUE', 'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE'
,'CAT_NUMBER_OF_ORDERS', 'CAT_AVG_DELIVERY_TIME','CAT_AVG_INVOICE_TIME','DELIVERY_DELAY','PAYMENT_DELAY'
,'MONTH', 'YEAR', 'WEEK', 'WEEKEND', 'DAY_OF_THE_MONTH']

In [14]:
df = df[cols]

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173291 entries, 0 to 173290
Data columns (total 38 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   ORDERQUANTITY                   173291 non-null  int16  
 1   UNITPRICE                       173291 non-null  int32  
 2   ORDERVALUE                      173291 non-null  int32  
 3   CC_CUSTOMER_LIFETIME_VALUE      173291 non-null  int32  
 4   CC_ORDER_FREQUENCY              173291 non-null  int16  
 5   CC_AVERAGE_ORDER_VALUE          173291 non-null  float64
 6   CC_RECENCY                      173291 non-null  int8   
 7   CC_AVG_ORDER_PROCESSING_TIME    173291 non-null  float64
 8   CC_AVG_INVOICE_PROCESSING_TIME  173291 non-null  float64
 9   CC_AVG_DELIVERY_DELAY           173291 non-null  float64
 10  CC_AVG_PAYMENT_DELAY            173291 non-null  float64
 11  CC_TOTAL_ORDERS                 173291 non-null  int16  
 12  CC_TOTAL_DELAYS 

In [16]:
df.tail()

Unnamed: 0,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CC_CUSTOMER_LIFETIME_VALUE,CC_ORDER_FREQUENCY,CC_AVERAGE_ORDER_VALUE,CC_RECENCY,CC_AVG_ORDER_PROCESSING_TIME,CC_AVG_INVOICE_PROCESSING_TIME,CC_AVG_DELIVERY_DELAY,...,CAT_NUMBER_OF_ORDERS,CAT_AVG_DELIVERY_TIME,CAT_AVG_INVOICE_TIME,DELIVERY_DELAY,PAYMENT_DELAY,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH
173286,772,299,230828,65953955,558,61068.476852,26,6.92963,8.069444,-1.450926,...,14445,8.534271,7.97128,0.0,-5.0,7,2023,27,False,4
173287,790,9,7110,13756619,548,12426.936766,24,6.952123,8.113821,-1.523035,...,19990,8.499787,7.972449,-2.0,0.0,8,2024,34,True,24
173288,421,3,1263,13756619,548,12426.936766,24,6.952123,8.113821,-1.523035,...,19990,8.499787,7.972449,-2.0,0.0,8,2024,34,True,24
173289,450,2,900,13756619,548,12426.936766,24,6.952123,8.113821,-1.523035,...,19990,8.499787,7.972449,-2.0,0.0,8,2024,34,True,24
173290,916,19,17404,64484457,554,57575.408036,25,7.101786,8.203571,-1.626786,...,14445,8.534271,7.97128,0.0,8.0,1,2024,1,False,2


In [18]:
from sklearn.preprocessing import StandardScaler

# Standardize the predictor variables
scaler = StandardScaler()

X_scaled = scaler.fit_transform(df.drop(columns=['DELIVERY_DELAY', 'PAYMENT_DELAY']))

df_scaled = pd.DataFrame(X_scaled, columns=df.drop(columns=['DELIVERY_DELAY', 'PAYMENT_DELAY']).columns)
df_scaled['DELIVERY_DELAY'] = df['DELIVERY_DELAY']
df_scaled['PAYMENT_DELAY'] = df['PAYMENT_DELAY']

In [19]:
df_scaled.head()

Unnamed: 0,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CC_CUSTOMER_LIFETIME_VALUE,CC_ORDER_FREQUENCY,CC_AVERAGE_ORDER_VALUE,CC_RECENCY,CC_AVG_ORDER_PROCESSING_TIME,CC_AVG_INVOICE_PROCESSING_TIME,CC_AVG_DELIVERY_DELAY,...,CAT_NUMBER_OF_ORDERS,CAT_AVG_DELIVERY_TIME,CAT_AVG_INVOICE_TIME,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH,DELIVERY_DELAY,PAYMENT_DELAY
0,0.768522,-0.249351,-0.245039,-0.38159,1.139589,-0.387349,-0.662097,-1.432849,0.87199,-0.685103,...,1.161755,-0.013016,-0.316488,1.30994,-1.590644,1.243291,-0.632181,-0.536187,-3.0,0.0
1,0.622408,-0.249991,-0.273455,-0.38159,1.139589,-0.387349,-0.662097,-1.432849,0.87199,-0.685103,...,1.161755,-0.013016,-0.316488,1.30994,-1.590644,1.243291,-0.632181,-0.536187,-3.0,0.0
2,-0.546511,-0.25031,-0.300408,-0.38159,1.139589,-0.387349,-0.662097,-1.432849,0.87199,-0.685103,...,1.161755,-0.013016,-0.316488,1.30994,-1.590644,1.243291,-0.632181,-0.536187,-3.0,0.0
3,2.930659,-0.25047,-0.269707,-0.146407,1.506251,-0.165817,0.797586,-0.091813,1.026887,0.813034,...,-1.013455,0.146568,1.419836,-1.069112,1.207321,-1.221954,1.581826,-1.442385,-2.0,-5.0
4,1.439322,-0.249671,-0.23661,-0.146407,1.506251,-0.165817,0.797586,-0.091813,1.026887,0.813034,...,-1.013455,0.146568,1.419836,-1.069112,1.207321,-1.221954,1.581826,-1.442385,-2.0,-5.0


In [20]:
df_scaled.describe()

Unnamed: 0,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CC_CUSTOMER_LIFETIME_VALUE,CC_ORDER_FREQUENCY,CC_AVERAGE_ORDER_VALUE,CC_RECENCY,CC_AVG_ORDER_PROCESSING_TIME,CC_AVG_INVOICE_PROCESSING_TIME,CC_AVG_DELIVERY_DELAY,...,CAT_NUMBER_OF_ORDERS,CAT_AVG_DELIVERY_TIME,CAT_AVG_INVOICE_TIME,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH,DELIVERY_DELAY,PAYMENT_DELAY
count,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,...,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,173291.0,171043.0,171043.0
mean,-5.742451e-17,5.9864180000000004e-18,-1.927135e-17,4.879341e-18,-5.762748e-16,5.777304000000001e-17,1.142176e-15,-1.544578e-15,6.882618e-15,-6.694948e-15,...,1.670047e-16,4.892068e-14,-1.892356e-14,7.491223000000001e-17,-1.38978e-13,-7.483023000000001e-17,-1.828728e-17,-5.9864180000000004e-18,-1.499711,1.983688
std,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,...,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.116606,4.397062
min,-0.6364742,-0.2504704,-0.3016176,-0.3901331,-2.343694,-0.3919639,-0.6620972,-2.633525,-3.28241,-2.439818,...,-1.432705,-1.421744,-1.967177,-1.663875,-1.590644,-1.769786,-0.6321809,-1.668935,-3.0,-6.0
25%,-0.5585866,-0.2496708,-0.2872826,-0.3462952,-0.7303838,-0.3466685,-0.6620972,-0.7403324,-0.6361801,-0.7646495,...,-1.119262,-0.3950523,-0.3720938,-0.7717303,-0.1916616,-0.8795586,-0.6321809,-0.8760112,-2.0,-2.0
50%,-0.4022075,-0.247432,-0.2554716,-0.3069064,0.002938945,-0.3072592,-0.6620972,0.0068897,0.05556234,-0.002251823,...,0.05893536,-0.01301581,-0.3164883,0.120414,-0.1916616,0.07914777,-0.6321809,0.03018704,-2.0,2.0
75%,-0.04537341,-0.2189672,-0.1699055,-0.2077363,0.6262633,-0.20535,0.06774444,0.6904922,0.7417703,0.7019169,...,0.7966014,0.2967571,1.000842,0.7151768,1.207321,0.763938,1.581826,0.8231105,-1.0,6.0
max,5.400724,5.985888,10.23092,3.860401,2.972896,3.526119,5.906477,2.741224,3.447941,2.301595,...,1.161755,1.787299,1.755793,1.607321,1.207321,1.722644,1.581826,1.729309,0.0,10.0


# Model Training for Payment Delay Prediction