# Use O2C_Template Customized Notebook Template

In [2]:
import pandas as pd
import numpy as np

In [3]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [4]:
#table_name = 'ORDER_TO_CASH_ENRICHED'
table_name = 'ORDER_TO_CASH_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [5]:
df_train = df[df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]
df_test = df[~df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]

In [6]:
df_train.shape, df_test.shape

((170445, 40), (2992, 40))

In [7]:
# Assuming df is your dataframe
df_train['ORDERDATE'] = pd.to_datetime(df_train['ORDERDATE'])
df_train['DELIVERYDATE'] = pd.to_datetime(df_train['DELIVERYDATE'])
df_train['INVOICEDATE'] = pd.to_datetime(df_train['INVOICEDATE'])
df_train['PAYMENTDATE'] = pd.to_datetime(df_train['PAYMENTDATE'])
df_train['INVOICEDUEDATE'] = pd.to_datetime(df_train['INVOICEDUEDATE'])

In [8]:
df_train['DELIVEREDON'] = pd.to_datetime(df_train['DELIVEREDON'])

In [9]:
df_train['SHIPMENTDATE'] = pd.to_datetime(df_train['SHIPMENTDATE'])

In [10]:
# Order Processing Time
df_train['SP_ORDER_PROCESSING_TIME'] = (df_train['SHIPMENTDATE'] - df_train['ORDERDATE']).dt.days

# Order Value per Unit
df_train['SP_ORDER_VALUE_PER_UNIT'] = df_train['ORDERVALUE'] / df_train['ORDERQUANTITY']

# Delivery Delay
df_train['SP_DELIVERY_DELAY'] = (df_train['DELIVEREDON'] - df_train['DELIVERYDATE']).dt.days

In [11]:
# Supplier Lifetime Value (CLV)
clv = df_train.groupby('SUPPLIERID')['ORDERVALUE'].sum().reset_index()
clv.columns = ['SUPPLIERID', 'SP_CUSTOMER_LIFETIME_VALUE']
df_train = df_train.merge(clv, on='SUPPLIERID', how='left')

In [12]:
# Order Frequency
order_freq = df_train.groupby('SUPPLIERID')['ORDERID'].nunique().reset_index()
order_freq.columns = ['SUPPLIERID', 'SP_ORDER_FREQUENCY']
df_train = df_train.merge(order_freq, on='SUPPLIERID', how='left')

In [13]:
# Average Order Value
avg_order_value = df_train.groupby('SUPPLIERID')['ORDERVALUE'].mean().reset_index()
avg_order_value.columns = ['SUPPLIERID', 'SP_AVERAGE_ORDER_VALUE']
df_train = df_train.merge(avg_order_value, on='SUPPLIERID', how='left')

In [14]:
df_train.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'SP_ORDER_PROCESSING_TIME',
       'SP_ORDER_VALUE_PER_UNIT', 'SP_DELIVERY_DELAY',
       'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY',
       'SP_AVERAGE_ORDER_VALUE'],
      dtype='object')

In [15]:
# Average Order Processing Time
avg_order_processing_time = df_train.groupby('SUPPLIERID')['SP_ORDER_PROCESSING_TIME'].mean().reset_index()
avg_order_processing_time.columns = ['SUPPLIERID', 'SP_AVG_ORDER_PROCESSING_TIME']

In [16]:
# Average Delivery Delay
avg_delivery_delay = df_train.groupby('SUPPLIERID')['SP_DELIVERY_DELAY'].mean().reset_index()
avg_delivery_delay.columns = ['SUPPLIERID', 'SP_AVG_DELIVERY_DELAY']

In [17]:
# Total Orders
total_orders = df_train.groupby('SUPPLIERID')['SP_ORDER_PROCESSING_TIME'].count().reset_index()
total_orders.columns = ['SUPPLIERID', 'SP_TOTAL_ORDERS']

In [18]:
# Order Consistency
order_consistency = df_train.groupby('SUPPLIERID')['SP_ORDER_PROCESSING_TIME'].std().reset_index()
order_consistency.columns = ['SUPPLIERID', 'SP_ORDER_CONSISTENCY']

In [19]:
# Delivery Consistency
delivery_consistency = df_train.groupby('SUPPLIERID')['SP_DELIVERY_DELAY'].std().reset_index()
delivery_consistency.columns = ['SUPPLIERID', 'SP_DELIVERY_CONSISTENCY']

In [20]:
# Merging all features into a single dataframe
features = [avg_order_processing_time, avg_delivery_delay, total_orders, order_consistency, delivery_consistency]
customer_features = df[['SUPPLIERID']].drop_duplicates().reset_index(drop=True)

In [21]:
for feature in features:
    customer_features = customer_features.merge(feature, on='SUPPLIERID', how='left')

In [22]:
# Merge the new features back into the original dataframe
df_train = df_train.merge(customer_features, on='SUPPLIERID', how='left')

In [23]:
df_train.columns

Index(['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY',
       'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME',
       'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT',
       'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE',
       'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE',
       'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD',
       'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS',
       'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT',
       'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE',
       'SUPPLIERNAME', 'SUPPLIERID', 'SP_ORDER_PROCESSING_TIME',
       'SP_ORDER_VALUE_PER_UNIT', 'SP_DELIVERY_DELAY',
       'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY',
       'SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME',
       'SP_AVG_DELIVERY_DELAY', 'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY',
       'SP_DELIVERY_CONSISTENCY'],
 

In [29]:
df = df_train[['SUPPLIERNAME', 'SUPPLIERID', 
       'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY','SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME',
       'SP_AVG_DELIVERY_DELAY', 'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY','SP_DELIVERY_CONSISTENCY']]

In [30]:
df.shape

(170445, 10)

In [31]:
df = df.drop_duplicates()

In [32]:
df.shape

(5, 10)

In [33]:
df.head(7)

Unnamed: 0,SUPPLIERNAME,SUPPLIERID,SP_CUSTOMER_LIFETIME_VALUE,SP_ORDER_FREQUENCY,SP_AVERAGE_ORDER_VALUE,SP_AVG_ORDER_PROCESSING_TIME,SP_AVG_DELIVERY_DELAY,SP_TOTAL_ORDERS,SP_ORDER_CONSISTENCY,SP_DELIVERY_CONSISTENCY
0,Wholesale Wizards,SUP--8f625c7b-d43b-4436-80ad-4cbba77c820q,1741200929,37809,22974.322514,2.0038,1.494201,75789,0.818478,0.49997
3,Supply Solutions,SUP--8f625c7b-d43b-4436-80ad-4cbba77c420y,442837278,7881,28052.532497,9.951413,2.979729,15786,3.1482,1.417916
10,Sure Source,SUP--8f625c7b-d43b-4436-80ad-4cbba77c779e,1790952920,14891,59798.094157,7.484608,2.998765,29950,1.706115,1.41788
14,Procure Plus,SUP--8f625c7b-d43b-4436-80ad-4cbba99c420p,14340525792,15941,447791.593817,12.499797,3.018392,32025,1.718133,1.415584
24,Prime Partners,SUP--8f625c7b-d43b-4436-80ad-4cbba77c770f,414761779,8478,24549.380231,5.011897,1.494762,16895,1.414938,0.499987


In [34]:
df_train_sf=my_session.createDataFrame(
        df.values.tolist(),
        schema=df.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_SUPPLIER_KPI")