# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
#table_name = 'ORDER_TO_CASH_ENRICHED'
table_name = 'ORDER_TO_CASH_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [4]:
df_train = df[df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]
df_test = df[~df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]

In [5]:
df_train.shape, df_test.shape

((170161, 40), (693, 40))

In [6]:
# Assuming df is your dataframe
df_train['ORDERDATE'] = pd.to_datetime(df_train['ORDERDATE'])
df_train['DELIVERYDATE'] = pd.to_datetime(df_train['DELIVERYDATE'])
df_train['INVOICEDATE'] = pd.to_datetime(df_train['INVOICEDATE'])
df_train['PAYMENTDATE'] = pd.to_datetime(df_train['PAYMENTDATE'])
df_train['INVOICEDUEDATE'] = pd.to_datetime(df_train['INVOICEDUEDATE'])

In [7]:
df_train['DELIVEREDON'] = pd.to_datetime(df_train['DELIVEREDON'])

In [8]:
df_train['SHIPMENTDATE'] = pd.to_datetime(df_train['SHIPMENTDATE'])

In [9]:
# Order Processing Time
df_train['SP_ORDER_PROCESSING_TIME'] = (df_train['SHIPMENTDATE'] - df_train['ORDERDATE']).dt.days

# Order Value per Unit
df_train['SP_ORDER_VALUE_PER_UNIT'] = df_train['ORDERVALUE'] / df_train['ORDERQUANTITY']

# Delivery Delay
df_train['SP_DELIVERY_DELAY'] = (df_train['DELIVEREDON'] - df_train['DELIVERYDATE']).dt.days

In [10]:
# Supplier Lifetime Value (CLV)
clv = df_train.groupby('SUPPLIERID')['ORDERVALUE'].sum().reset_index()
clv.columns = ['SUPPLIERID', 'SP_CUSTOMER_LIFETIME_VALUE']
df_train = df_train.merge(clv, on='SUPPLIERID', how='left')

In [11]:
# Order Frequency
order_freq = df_train.groupby('SUPPLIERID')['ORDERID'].nunique().reset_index()
order_freq.columns = ['SUPPLIERID', 'SP_ORDER_FREQUENCY']
df_train = df_train.merge(order_freq, on='SUPPLIERID', how='left')

In [12]:
# Average Order Value
avg_order_value = df_train.groupby('SUPPLIERID')['ORDERVALUE'].mean().reset_index()
avg_order_value.columns = ['SUPPLIERID', 'SP_AVERAGE_ORDER_VALUE']
df_train = df_train.merge(avg_order_value, on='SUPPLIERID', how='left')

In [13]:
df_train.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'SP_ORDER_PROCESSING_TIME',
       'SP_ORDER_VALUE_PER_UNIT', 'SP_DELIVERY_DELAY',
       'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY',
       'SP_AVERAGE_ORDER_VALUE'],
      dtype='object')

In [14]:
# Average Order Processing Time
avg_order_processing_time = df_train.groupby('SUPPLIERID')['SP_ORDER_PROCESSING_TIME'].mean().reset_index()
avg_order_processing_time.columns = ['SUPPLIERID', 'SP_AVG_ORDER_PROCESSING_TIME']

In [15]:
# Average Delivery Delay
avg_delivery_delay = df_train.groupby('SUPPLIERID')['SP_DELIVERY_DELAY'].mean().reset_index()
avg_delivery_delay.columns = ['SUPPLIERID', 'SP_AVG_DELIVERY_DELAY']

In [16]:
# Total Orders
total_orders = df_train.groupby('SUPPLIERID')['SP_ORDER_PROCESSING_TIME'].count().reset_index()
total_orders.columns = ['SUPPLIERID', 'SP_TOTAL_ORDERS']

In [17]:
# Order Consistency
order_consistency = df_train.groupby('SUPPLIERID')['SP_ORDER_PROCESSING_TIME'].std().reset_index()
order_consistency.columns = ['SUPPLIERID', 'SP_ORDER_CONSISTENCY']

In [18]:
# Delivery Consistency
delivery_consistency = df_train.groupby('SUPPLIERID')['SP_DELIVERY_DELAY'].std().reset_index()
delivery_consistency.columns = ['SUPPLIERID', 'SP_DELIVERY_CONSISTENCY']

In [19]:
# Merging all features into a single dataframe
features = [avg_order_processing_time, avg_delivery_delay, total_orders, order_consistency, delivery_consistency]
customer_features = df[['SUPPLIERID']].drop_duplicates().reset_index(drop=True)

In [20]:
for feature in features:
    customer_features = customer_features.merge(feature, on='SUPPLIERID', how='left')

In [21]:
# Merge the new features back into the original dataframe
df_train = df_train.merge(customer_features, on='SUPPLIERID', how='left')

In [22]:
df_train.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'SP_ORDER_PROCESSING_TIME',
       'SP_ORDER_VALUE_PER_UNIT', 'SP_DELIVERY_DELAY',
       'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY',
       'SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME',
       'SP_AVG_DELIVERY_DELAY', 'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY',
       'SP_DELIVERY_CONSISTENCY'],
 

In [23]:
df = df_train[['SUPPLIERNAME', 'SUPPLIERID', 
       'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY','SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME',
       'SP_AVG_DELIVERY_DELAY', 'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY','SP_DELIVERY_CONSISTENCY']]

In [24]:
df.shape

(170161, 10)

In [25]:
df = df.drop_duplicates()

In [26]:
df.shape

(5, 10)

In [27]:
df.head(7)

Unnamed: 0,SUPPLIERNAME,SUPPLIERID,SP_CUSTOMER_LIFETIME_VALUE,SP_ORDER_FREQUENCY,SP_AVERAGE_ORDER_VALUE,SP_AVG_ORDER_PROCESSING_TIME,SP_AVG_DELIVERY_DELAY,SP_TOTAL_ORDERS,SP_ORDER_CONSISTENCY,SP_DELIVERY_CONSISTENCY
0,Procure Plus,SUP--8f625c7b-d43b-4436-80ad-4cbba99c420p,14070911837,15941,440073.554669,12.504942,3.002158,31974,1.709657,1.408239
5,Sure Source,SUP--8f625c7b-d43b-4436-80ad-4cbba77c779e,1752937837,14626,59739.557544,7.500733,2.999012,29343,1.708891,1.413743
7,Prime Partners,SUP--8f625c7b-d43b-4436-80ad-4cbba77c770f,420747225,8556,24574.921149,5.011915,1.49816,17121,1.413585,0.500011
9,Wholesale Wizards,SUP--8f625c7b-d43b-4436-80ad-4cbba77c820q,1743863445,37831,23077.660888,2.008033,1.49691,75565,0.815395,0.499994
18,Supply Solutions,SUP--8f625c7b-d43b-4436-80ad-4cbba77c420y,443409125,8046,27442.079775,9.970231,3.001176,16158,3.1795,1.41279


In [28]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_SUPPLIER_KPI")