# Use O2C_Template Customized Notebook Template

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, f1_score, r2_score, roc_auc_score, confusion_matrix, accuracy_score

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [4]:
df_train = df[df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]
df_test = df[~df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]

In [5]:
df_train.shape, df_test.shape

((170161, 79), (693, 79))

In [6]:
df_final = df.copy()
df = df_train.copy()

In [7]:
df_final.shape, df.shape

((170854, 79), (170161, 79))

In [8]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'], errors='coerce')
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'], errors='coerce')
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'], errors='coerce')
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'], errors='coerce')
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'], errors='coerce')
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'], errors='coerce')

In [9]:
df['MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.month
df['YEAR'] = pd.to_datetime(df['ORDERDATE']).dt.year
df['WEEK'] = pd.to_datetime(df['ORDERDATE']).dt.isocalendar().week
df['WEEKEND'] = pd.to_datetime(df['ORDERDATE']).dt.weekday >= 5  # True if the day is Saturday or Sunday
df['DAY_OF_THE_MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.day

In [10]:
# Delivery Delay
df['DELIVERY_DELAY'] = (df['DELIVEREDON'] - df['DELIVERYDATE']).dt.days

# Payment Delay
df['PAYMENT_DELAY'] = (df['PAYMENTDATE'] - df['INVOICEDUEDATE']).dt.days

In [11]:
df[['PAYMENTDATE','INVOICEDUEDATE','PAYMENT_DELAY','DELIVEREDON','DELIVERYDATE','DELIVERY_DELAY']].head()

Unnamed: 0,PAYMENTDATE,INVOICEDUEDATE,PAYMENT_DELAY,DELIVEREDON,DELIVERYDATE,DELIVERY_DELAY
0,2022-01-21,2022-02-02,-12,2022-01-08,2022-01-06,2
1,2022-01-21,2022-02-02,-12,2022-01-08,2022-01-06,2
2,2022-01-21,2022-02-02,-12,2022-01-08,2022-01-06,2
3,2022-02-01,2022-02-06,-5,2022-01-06,2022-01-05,1
4,2022-02-21,2022-02-10,11,2022-01-15,2022-01-11,4


In [12]:
def set_flag(delay):
    return 1 if delay >= 1 else 0

In [13]:
df['PAYMENT_DELAY_FLAG'] = df['PAYMENT_DELAY'].apply(set_flag)
df['DELIVERY_DELAY_FLAG'] = df['DELIVERY_DELAY'].apply(set_flag)

In [14]:
df['PAYMENT_DELAY_FLAG'].value_counts()

PAYMENT_DELAY_FLAG
1    89835
0    80326
Name: count, dtype: int64

In [15]:
df['DELIVERY_DELAY_FLAG'].value_counts()

DELIVERY_DELAY_FLAG
1    170161
Name: count, dtype: int64

In [16]:
print(list(df.columns))

['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY', 'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME', 'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT', 'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE', 'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE', 'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD', 'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS', 'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT', 'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE', 'SUPPLIERNAME', 'SUPPLIERID', 'CC_CUSTOMER_LIFETIME_VALUE', 'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY', 'CC_PREFERRED_PAYMENT_METHOD', 'CC_PREFERRED_PRODUCT_CATEGORY', 'CC_PREFERRED_PRODUCT_TYPE', 'CC_AVG_ORDER_PROCESSING_TIME', 'CC_AVG_INVOICE_PROCESSING_TIME', 'CC_AVG_DELIVERY_DELAY', 'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS', 'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY',

In [17]:
cols = ['ORDERAMOUNT','ORDERCOUNT',
'CC_CUSTOMER_LIFETIME_VALUE', 'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY', 'CC_AVG_ORDER_PROCESSING_TIME', 'CC_AVG_INVOICE_PROCESSING_TIME', 
'CC_AVG_DELIVERY_DELAY', 'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS', 'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY', 
'CC_DELIVERY_CONSISTENCY', 'CC_PAYMENT_CONSISTENCY', 'CAT_TOTAL_SALES_VOLUME', 'CAT_TOTAL_SALES_VALUE', 
'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE', 'CAT_NUMBER_OF_ORDERS', 'CAT_AVG_DELIVERY_TIME', 'CAT_AVG_INVOICE_TIME', 
'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY', 'SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME', 'SP_AVG_DELIVERY_DELAY', 
'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY', 'SP_DELIVERY_CONSISTENCY', 'MONTH', 'YEAR', 'WEEK', 'WEEKEND', 'DAY_OF_THE_MONTH', 'PAYMENT_DELAY_FLAG']

In [18]:
df_payments = df[cols]

In [19]:
df_payments.head()

Unnamed: 0,ORDERAMOUNT,ORDERCOUNT,CC_CUSTOMER_LIFETIME_VALUE,CC_ORDER_FREQUENCY,CC_AVERAGE_ORDER_VALUE,CC_RECENCY,CC_AVG_ORDER_PROCESSING_TIME,CC_AVG_INVOICE_PROCESSING_TIME,CC_AVG_DELIVERY_DELAY,CC_AVG_PAYMENT_DELAY,...,SP_AVG_DELIVERY_DELAY,SP_TOTAL_ORDERS,SP_ORDER_CONSISTENCY,SP_DELIVERY_CONSISTENCY,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH,PAYMENT_DELAY_FLAG
0,43795,3,14461634,542,12597.24216,2,4.508711,18.100174,1.500871,-6.397213,...,1.49691,75565,0.815395,0.499994,1,2022,52,True,1,0
1,43795,3,14461634,542,12597.24216,2,4.508711,18.100174,1.500871,-6.397213,...,1.49691,75565,0.815395,0.499994,1,2022,52,True,1,0
2,43795,3,14461634,542,12597.24216,2,4.508711,18.100174,1.500871,-6.397213,...,1.49691,75565,0.815395,0.499994,1,2022,52,True,1,0
3,5096,1,35737829,519,35105.922397,2,4.539293,29.626719,1.530452,0.13556,...,1.49691,75565,0.815395,0.499994,1,2022,52,True,1,0
4,138438,2,34640409,597,27935.81371,1,12.387903,22.56371,2.992742,-1.927419,...,3.001176,16158,3.1795,1.41279,1,2022,52,True,1,1


In [20]:
df_payments.shape

(170161, 37)

In [21]:
df_payments = df_payments.drop_duplicates()

In [22]:
df_payments.shape

(85000, 37)

In [23]:
df_payments['PAYMENT_DELAY_FLAG'].value_counts()

PAYMENT_DELAY_FLAG
0    45816
1    39184
Name: count, dtype: int64

In [24]:
# Separate features and target variable
X = df_payments.drop(columns=['PAYMENT_DELAY_FLAG'])
y = df_payments['PAYMENT_DELAY_FLAG']

In [25]:
X.shape

(85000, 36)

# Standard Scaling of Input

In [26]:
from sklearn.preprocessing import StandardScaler

# Standardize the predictor variables
scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

df_scaled = pd.DataFrame(X_scaled, columns=X.columns)

In [27]:
df_scaled.shape

(85000, 36)

In [28]:
df_scaled['PAYMENT_DELAY_FLAG'] = y.values

In [29]:
df_scaled['PAYMENT_DELAY_FLAG'].value_counts()

PAYMENT_DELAY_FLAG
0    45816
1    39184
Name: count, dtype: int64

# Model Training for Payment Delay Prediction

In [30]:
# Separate features and target variable
X = df_scaled.drop(columns=['PAYMENT_DELAY_FLAG'])
y = df_scaled['PAYMENT_DELAY_FLAG']

In [31]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Decision Tree Classifier

In [32]:
#from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

In [33]:
y_pred = dtc.predict(X_test)

In [34]:
dtc_train_acc = accuracy_score(y_train, dtc.predict(X_train))
dtc_test_acc = accuracy_score(y_test, y_pred)

print(f"Training accuracy of Decision Tree is : {dtc_train_acc}")
print(f"Test accuracy of Decision Tree is : {dtc_test_acc}")

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Training accuracy of Decision Tree is : 1.0
Test accuracy of Decision Tree is : 0.9712470588235295
[[11064   370]
 [  241  9575]]
              precision    recall  f1-score   support

           0       0.98      0.97      0.97     11434
           1       0.96      0.98      0.97      9816

    accuracy                           0.97     21250
   macro avg       0.97      0.97      0.97     21250
weighted avg       0.97      0.97      0.97     21250



# Random Forest Classifier

In [35]:
#from sklearn.tree import DecisionTreeClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)

In [36]:
y_pred = rfc.predict(X_test)

In [37]:
rfc_train_acc = accuracy_score(y_train, rfc.predict(X_train))
rfc_test_acc = accuracy_score(y_test, y_pred)

print(f"Training accuracy of RandomForest is : {rfc_train_acc}")
print(f"Test accuracy of RandomForest is : {rfc_test_acc}")

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Training accuracy of RandomForest is : 0.9999529411764706
Test accuracy of RandomForest is : 0.9855058823529412
[[11390    44]
 [  264  9552]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99     11434
           1       1.00      0.97      0.98      9816

    accuracy                           0.99     21250
   macro avg       0.99      0.98      0.99     21250
weighted avg       0.99      0.99      0.99     21250



In [200]:
from fosforml import register_model

In [201]:
type(X_train), type(X_test),type(y_train),type(y_test), type(y_pred)

(pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.series.Series,
 pandas.core.series.Series,
 numpy.ndarray)

In [202]:
y_train_df = pd.DataFrame(y_train)
y_test_df = pd.DataFrame(y_test)

In [203]:
y_pred_df = pd.DataFrame(y_pred, columns=['PREDICTED_PAYMENT_DELAY'])

In [204]:
type(X_train), type(X_test),type(y_train_df),type(y_test_df), type(y_pred_df)

(pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame)

# Model Registration

In [205]:
## registering the model in Fosfor Insight Designer.
register_model(
    model_obj=rfc, 
    session=my_session,
    x_train=X_train,
    y_train=y_train_df,
    x_test=X_test,
    y_test=y_test_df,
    y_pred=y_pred_df,
    source="Notebook",
    dataset_name="ORDER_TO_CASH_ENRICHED",
    dataset_source="Snowflake",
    #dataset_source="InMemory",
    name="RandomForest_Payment_Delay_Classifier",
    description="RandomForest model trained via Notebook to identify delay order payment",
    flavour="sklearn",
    model_type="classification",
    conda_dependencies=["scikit-learn==1.3.2"]
)

Got error object of type 'NoneType' has no len() when trying to read default values from function: <class 'snowflake.ml.modeling.metrics.classification._register_confusion_matrix_computer.<locals>.ConfusionMatrixComputer'>. Proceeding without creating optional arguments
Got error object of type 'NoneType' has no len() when trying to read default values from function: <class 'snowflake.ml.modeling.metrics.metrics_utils.register_accumulator_udtf.<locals>.Accumulator'>. Proceeding without creating optional arguments


Error in while calculating confusion_matrix 
Calculating build time metrics

Progress: ██████████████                                                         20.0%
Calculating build time metrics

Progress: ████████████████████████████                                           40.0%


Got error object of type 'NoneType' has no len() when trying to read default values from function: <class 'snowflake.ml.modeling.metrics.classification._register_multilabel_confusion_matrix_computer.<locals>.MultilabelConfusionMatrixComputer'>. Proceeding without creating optional arguments
The version of package 'scikit-learn' in the local environment is 1.3.2, which does not fit the criteria for the requirement 'scikit-learn<1.4'. Your UDF might not work when the package version is different between the server and your local environment.
DataFrame.flatten() is deprecated since 0.7.0. Use `DataFrame.join_table_function()` instead.
Got error object of type 'NoneType' has no len() when trying to read default values from function: <class 'snowflake.ml.modeling.metrics.classification._register_multilabel_confusion_matrix_computer.<locals>.MultilabelConfusionMatrixComputer'>. Proceeding without creating optional arguments
The version of package 'scikit-learn' in the local environment is 1.

Calculating build time metrics

Progress: ██████████████████████████████████████████                             60.0%


The version of package 'scikit-learn' in the local environment is 1.3.2, which does not fit the criteria for the requirement 'scikit-learn==1.3.*'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'snowflake-snowpark-python' in the local environment is 1.22.1, which does not fit the criteria for the requirement 'snowflake-snowpark-python'. Your UDF might not work when the package version is different between the server and your local environment.
Got error object of type 'NoneType' has no len() when trying to read default values from function: <function roc_curve.<locals>.roc_curve_anon_sproc at 0x7f6b9b85c280>. Proceeding without creating optional arguments
The version of package 'scikit-learn' in the local environment is 1.3.2, which does not fit the criteria for the requirement 'scikit-learn==1.3.*'. Your UDF might not work when the package version is different between the server and your local enviro

Calculating build time metrics

Progress: ████████████████████████████████████████████████████████               80.0%
Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


"Model 'MODEL_0CA27E4E_2746_4060_B8F7_DBF890E89D5A_FDC_RANDOMFOREST_PAYMENT_DELAY_CLASSIFIER' registered successfully."

# Model inference on entire dataset

In [38]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [39]:
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'], errors='coerce')

df['MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.month
df['YEAR'] = pd.to_datetime(df['ORDERDATE']).dt.year
df['WEEK'] = pd.to_datetime(df['ORDERDATE']).dt.isocalendar().week
df['WEEKEND'] = pd.to_datetime(df['ORDERDATE']).dt.weekday >= 5  # True if the day is Saturday or Sunday
df['DAY_OF_THE_MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.day

In [40]:
cols = ['ORDERAMOUNT','ORDERCOUNT',
'CC_CUSTOMER_LIFETIME_VALUE', 'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY', 'CC_AVG_ORDER_PROCESSING_TIME', 'CC_AVG_INVOICE_PROCESSING_TIME', 
'CC_AVG_DELIVERY_DELAY', 'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS', 'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY', 
'CC_DELIVERY_CONSISTENCY', 'CC_PAYMENT_CONSISTENCY', 'CAT_TOTAL_SALES_VOLUME', 'CAT_TOTAL_SALES_VALUE', 
'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE', 'CAT_NUMBER_OF_ORDERS', 'CAT_AVG_DELIVERY_TIME', 'CAT_AVG_INVOICE_TIME', 
'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY', 'SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME', 'SP_AVG_DELIVERY_DELAY', 
'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY', 'SP_DELIVERY_CONSISTENCY', 'MONTH', 'YEAR', 'WEEK', 'WEEKEND', 'DAY_OF_THE_MONTH']

In [41]:
X_scaled = scaler.fit_transform(df[cols])

In [42]:
y_pred = rfc.predict(X_scaled)

In [46]:
y_prob = rfc.predict_proba(X_scaled)[:,1]

In [47]:
df['PREDICTED_PAYMENT_DELAY'] = y_pred
df['PROBABILITY_PAYMENT_DELAY'] = y_prob

In [48]:
df.head()

Unnamed: 0,ORDERID,ORDERITEMID,PRODUCTID,PRODUCTNAME,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CUSTOMERID,CUSTOMERNAME,PRODUCTCATEGORY,...,SP_TOTAL_ORDERS,SP_ORDER_CONSISTENCY,SP_DELIVERY_CONSISTENCY,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH,PREDICTED_PAYMENT_DELAY,PROBABILITY_PAYMENT_DELAY
0,OR-5010bdc2-ea6e-43ff-91fe-34f4f8e87217,OI-1e6cf9ab-6c8d-4005-9dfe-76902e925900,PID-df71fb0e-604f-4ded-8a16-850f47932a2a,Bottled Water,8368,5,41840,CID-98d678b8-9cf7-4a08-a80d-6e8507afd6cb,Locale Mart,Food Beverages,...,75565,0.815395,0.499994,1,2022,52,True,1,0,0.08
1,OR-5010bdc2-ea6e-43ff-91fe-34f4f8e87217,OI-f48f7937-0528-41fe-b9dd-3643d663b2cf,PID-3c84d073-9164-4016-9392-f12bf7e339d4,Greek Yogurt,377,3,1131,CID-98d678b8-9cf7-4a08-a80d-6e8507afd6cb,Locale Mart,Food Beverages,...,75565,0.815395,0.499994,1,2022,52,True,1,0,0.08
2,OR-5010bdc2-ea6e-43ff-91fe-34f4f8e87217,OI-eb1bac9b-590c-4066-8597-0fdd6cfa419a,PID-22bab63a-1cf7-49cb-9257-dfd144a9bb86,Herbal Tea,412,2,824,CID-98d678b8-9cf7-4a08-a80d-6e8507afd6cb,Locale Mart,Food Beverages,...,75565,0.815395,0.499994,1,2022,52,True,1,0,0.08
3,OR-8d781f9b-75dd-49c9-af16-e829e0886d25,OI-30b1d19c-e299-4dbd-8542-e1d4feb37b4e,PID-b5d5bbc2-7cbc-4d3a-9ee5-6252405846bf,Vacuum Cleaner,14,364,5096,CID-19d2d36a-1d5c-4504-9f12-fb1a62ee772c,Vertex Ventures,Consumer Goods,...,75565,0.815395,0.499994,1,2022,52,True,1,0,0.06
4,OR-1df689a0-f2be-4040-915f-ad4450760af4,OI-248b3f42-db14-4bdb-9c89-054cd667cb0b,PID-6cf6400d-1e83-4e24-ac90-f53754533308,Brake Pads,819,60,49140,CID-bab0d39f-4989-46b9-b129-2974828aba33,GlobalReach,Automotive,...,16158,3.1795,1.41279,1,2022,52,True,1,1,0.97


In [49]:
def re_set_flag(delay):
    return 'Y' if delay == 1 else 'N'

In [50]:
df['PREDICTED_PAYMENT_DELAY_FLAG'] = df['PREDICTED_PAYMENT_DELAY'].apply(re_set_flag)

# Push Model output back to Snowflake

In [228]:
sf_df = my_session.createDataFrame(df)
sf_df.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_INSIGHT_OUTPUT")

# Customer Segmentation Logic

In [247]:
def segment_customer(row):
    if row['CC_ORDER_FREQUENCY'] > 580 and row['CC_AVERAGE_ORDER_VALUE'] > 65000:
        return 'High Value'
    elif row['CC_ORDER_FREQUENCY'] > 575:
        return 'Frequent Buyer'
    elif row['CC_AVERAGE_ORDER_VALUE'] > 60000:
        return 'High Spender'
    else:
        return 'Regular'

In [248]:
# Apply segmentation
df['CUSTOMER_SEGMENT'] = df.apply(segment_customer, axis=1)

In [249]:
temp = df[['CUSTOMERID','CUSTOMER_SEGMENT']]
temp = temp.drop_duplicates()
temp['CUSTOMER_SEGMENT'].value_counts()

CUSTOMER_SEGMENT
Regular           83
Frequent Buyer    38
High Spender      20
High Value         9
Name: count, dtype: int64

# Discount offer Eligbility 

In [250]:
df[['CC_TOTAL_ORDERS','CC_AVERAGE_ORDER_VALUE','CC_AVG_PAYMENT_DELAY','CC_RECENCY']].quantile([0.25,0.50,0.75,0.90])

Unnamed: 0,CC_TOTAL_ORDERS,CC_AVERAGE_ORDER_VALUE,CC_AVG_PAYMENT_DELAY,CC_RECENCY
0.25,1106.0,24125.384481,-4.188368,-91.0
0.5,1133.0,33440.178975,1.170732,-87.0
0.75,1170.0,60327.420108,5.393116,-79.0
0.9,1199.0,75144.645479,9.370719,-72.0


In [251]:
df['BULK_PURCHASE_DISCOUNT'] = df.apply(
    lambda row: 'Yes' if row['CC_TOTAL_ORDERS'] > 1190 or row['CC_AVERAGE_ORDER_VALUE'] > 75144 else 'No', axis=1)

df['EARLY_PAYMENT_DISCOUNT'] = df.apply(
    lambda row: 'Yes' if row['CC_AVG_PAYMENT_DELAY'] < 1 else 'No', axis=1)

# Overwrite Model Output data in Snowflake

In [252]:
sf_df = my_session.createDataFrame(df)
sf_df.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_GOLD.ORDER_TO_CASH_INSIGHT_OUTPUT")