# Use O2C_Template Customized Notebook Template

In [28]:
import pandas as pd
import numpy as np

In [29]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [None]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [None]:
df_train = df[df['INVOICESTATUS'] == 'Paid']
df_new = df[df['INVOICESTATUS'] != 'Paid']

In [None]:
df_train.shape, df_new.shape

In [None]:
# Assuming df is your dataframe
df_train['ORDERDATE'] = pd.to_datetime(df_train['ORDERDATE'])
df_train['DELIVERYDATE'] = pd.to_datetime(df_train['DELIVERYDATE'])
df_train['INVOICEDATE'] = pd.to_datetime(df_train['INVOICEDATE'])
df_train['PAYMENTDATE'] = pd.to_datetime(df_train['PAYMENTDATE'])
df_train['INVOICEDUEDATE'] = pd.to_datetime(df_train['INVOICEDUEDATE'])

In [None]:
df_train['DELIVEREDON'] = pd.to_datetime(df_train['DELIVEREDON'])

In [None]:
# Order Processing Time
df_train['ORDER_PROCESSING_TIME'] = (df_train['DELIVERYDATE'] - df_train['ORDERDATE']).dt.days

# Invoice Processing Time
df_train['INVOICE_PROCESSING_TIME'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDATE']).dt.days

# Order Value per Unit
df_train['ORDER_VALUE_PER_UNIT'] = df_train['ORDERVALUE'] / df_train['ORDERQUANTITY']

In [None]:
# Delivery Delay
df_train['DELIVERY_DELAY'] = (df_train['DELIVERYDATE'] - df_train['DELIVEREDON']).dt.days

In [None]:
# Payment Delay
df_train['PAYMENT_DELAY'] = (df_train['PAYMENTDATE'] - df_train['INVOICEDUEDATE']).dt.days

In [None]:
# Customer Lifetime Value (CLV)
clv = df_train.groupby('CUSTOMERID')['ORDERVALUE'].sum().reset_index()
clv.columns = ['CUSTOMERID', 'CUSTOMER_LIFETIME_VALUE']
df_train = df_train.merge(clv, on='CUSTOMERID', how='left')

In [None]:
# Order Frequency
order_freq = df_train.groupby('CUSTOMERID')['ORDERID'].nunique().reset_index()
order_freq.columns = ['CUSTOMERID', 'ORDER_FREQUENCY']
df_train = df_train.merge(order_freq, on='CUSTOMERID', how='left')

In [None]:
# Average Order Value
avg_order_value = df_train.groupby('CUSTOMERID')['ORDERVALUE'].mean().reset_index()
avg_order_value.columns = ['CUSTOMERID', 'AVERAGE_ORDER_VALUE']
df_train = df_train.merge(avg_order_value, on='CUSTOMERID', how='left')

In [None]:
# Recency
latest_order_date = df_train.groupby('CUSTOMERID')['ORDERDATE'].max().reset_index()
latest_order_date.columns = ['CUSTOMERID', 'LAST_ORDER_DATE']
latest_order_date['RECENCY'] = (pd.to_datetime('today') - latest_order_date['LAST_ORDER_DATE']).dt.days
df_train = df_train.merge(latest_order_date[['CUSTOMERID', 'RECENCY']], on='CUSTOMERID', how='left')

In [None]:
# Preferred Payment Method
preferred_payment_method = df_train.groupby('CUSTOMERID')['PAYMENTMETHOD'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_payment_method.columns = ['CUSTOMERID', 'PREFERRED_PAYMENT_METHOD']
df_train = df_train.merge(preferred_payment_method, on='CUSTOMERID', how='left')

In [None]:
# Preferred Product Category
preferred_product_category = df_train.groupby('CUSTOMERID')['PRODUCTCATEGORY'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_category.columns = ['CUSTOMERID', 'PREFERRED_PRODUCT_CATEGORY']
df_train = df_train.merge(preferred_product_category, on='CUSTOMERID', how='left')

In [None]:
# Preferred Product Category
preferred_product_type = df_train.groupby('CUSTOMERID')['PRODUCTTYPE'].agg(lambda x: x.value_counts().index[0]).reset_index()
preferred_product_type.columns = ['CUSTOMERID', 'PREFERRED_PRODUCT_TYPE']
df_train = df_train.merge(preferred_product_type, on='CUSTOMERID', how='left')

In [None]:
df_train.columns

In [None]:
df_train.tail()

In [None]:
# Average Order Processing Time
avg_order_processing_time = df_train.groupby('CUSTOMERID')['ORDER_PROCESSING_TIME'].mean().reset_index()
avg_order_processing_time.columns = ['CUSTOMERID', 'AVG_ORDER_PROCESSING_TIME']

In [None]:
# Average Invoice Processing Time
avg_invoice_processing_time = df_train.groupby('CUSTOMERID')['INVOICE_PROCESSING_TIME'].mean().reset_index()
avg_invoice_processing_time.columns = ['CUSTOMERID', 'AVG_INVOICE_PROCESSING_TIME']

In [None]:
# Average Delivery Delay
avg_delivery_delay = df_train.groupby('CUSTOMERID')['DELIVERY_DELAY'].mean().reset_index()
avg_delivery_delay.columns = ['CUSTOMERID', 'AVG_DELIVERY_DELAY']

In [None]:
# Average Payment Delay
avg_payment_delay = df_train.groupby('CUSTOMERID')['PAYMENT_DELAY'].mean().reset_index()
avg_payment_delay.columns = ['CUSTOMERID', 'AVG_PAYMENT_DELAY']

In [None]:
df.columns

In [None]:
# Total Orders
total_orders = df_train.groupby('CUSTOMERID')['ORDER_PROCESSING_TIME'].count().reset_index()
total_orders.columns = ['CUSTOMERID', 'TOTAL_ORDERS']

In [None]:
# Total Delays
total_delays = df_train.groupby('CUSTOMERID')[['DELIVERY_DELAY', 'PAYMENT_DELAY']].sum().reset_index()
total_delays['TOTAL_DELAYS'] = total_delays['DELIVERY_DELAY'] + total_delays['PAYMENT_DELAY']
total_delays = total_delays[['CUSTOMERID', 'TOTAL_DELAYS']]

In [None]:
# Order Consistency
order_consistency = df_train.groupby('CUSTOMERID')['ORDER_PROCESSING_TIME'].std().reset_index()
order_consistency.columns = ['CUSTOMERID', 'ORDER_CONSISTENCY']

In [None]:
# Invoice Consistency
invoice_consistency = df_train.groupby('CUSTOMERID')['INVOICE_PROCESSING_TIME'].std().reset_index()
invoice_consistency.columns = ['CUSTOMERID', 'INVOICE_CONSISTENCY']

In [None]:
# Delivery Consistency
delivery_consistency = df_train.groupby('CUSTOMERID')['DELIVERY_DELAY'].std().reset_index()
delivery_consistency.columns = ['CUSTOMERID', 'DELIVERY_CONSISTENCY']

In [None]:
# Payment Consistency
payment_consistency = df_train.groupby('CUSTOMERID')['PAYMENT_DELAY'].std().reset_index()
payment_consistency.columns = ['CUSTOMERID', 'PAYMENT_CONSISTENCY']

In [None]:
# Merging all features into a single dataframe
features = [avg_order_processing_time, avg_invoice_processing_time, avg_delivery_delay, avg_payment_delay, total_orders,total_delays, order_consistency, invoice_consistency, delivery_consistency, payment_consistency]
customer_features = df[['CUSTOMERID']].drop_duplicates().reset_index(drop=True)

In [None]:
for feature in features:
    customer_features = customer_features.merge(feature, on='CUSTOMERID', how='left')

In [None]:
# Merge the new features back into the original dataframe
df_train = df_train.merge(customer_features, on='CUSTOMERID', how='left')

In [None]:
customer_features.head()

In [None]:
df = df_train[['CUSTOMERID', 'CUSTOMERNAME', 'CREDITRATING', 'CUSTOMERTYPE','COMPANYTYPE','CONTACTDETAILS','EMAILDETAILS','ADDRESSDETAILS',
               'ADMINDETAILS','CREDITLIMIT','CUSTOMER_LIFETIME_VALUE', 
               'ORDER_FREQUENCY', 'AVERAGE_ORDER_VALUE','RECENCY', 'PREFERRED_PAYMENT_METHOD', 'PREFERRED_PRODUCT_CATEGORY', 
               'PREFERRED_PRODUCT_TYPE', 'AVG_ORDER_PROCESSING_TIME','AVG_INVOICE_PROCESSING_TIME','AVG_DELIVERY_DELAY',
               'AVG_PAYMENT_DELAY','TOTAL_DELAYS','ORDER_CONSISTENCY','INVOICE_CONSISTENCY','DELIVERY_CONSISTENCY','PAYMENT_CONSISTENCY']]

In [None]:
df.shape

In [None]:
df = df.drop_duplicates()

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_CUSTOMER_KPI")