# Use O2C_Template Customized Notebook Template

In [63]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report, f1_score, r2_score, roc_auc_score, confusion_matrix, accuracy_score

In [64]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [65]:
table_name = 'ORDER_TO_CASH_ENRICHED'

sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [66]:
df_train = df[df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]
df_test = df[~df['INVOICESTATUS'].isin(['LATE_PAYMENT','ONTIME_PAYMENT'])]

In [67]:
df_train.shape, df_test.shape

((170445, 79), (2992, 79))

In [68]:
df_final = df.copy()
df = df_train.copy()

In [69]:
df_final.shape, df.shape

((173437, 79), (170445, 79))

In [70]:
# Assuming df is your dataframe
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'], errors='coerce')
df['DELIVERYDATE'] = pd.to_datetime(df['DELIVERYDATE'], errors='coerce')
df['INVOICEDATE'] = pd.to_datetime(df['INVOICEDATE'], errors='coerce')
df['PAYMENTDATE'] = pd.to_datetime(df['PAYMENTDATE'], errors='coerce')
df['INVOICEDUEDATE'] = pd.to_datetime(df['INVOICEDUEDATE'], errors='coerce')
df['DELIVEREDON'] = pd.to_datetime(df['DELIVEREDON'], errors='coerce')

In [71]:
df['MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.month
df['YEAR'] = pd.to_datetime(df['ORDERDATE']).dt.year
df['WEEK'] = pd.to_datetime(df['ORDERDATE']).dt.isocalendar().week
df['WEEKEND'] = pd.to_datetime(df['ORDERDATE']).dt.weekday >= 5  # True if the day is Saturday or Sunday
df['DAY_OF_THE_MONTH'] = pd.to_datetime(df['ORDERDATE']).dt.day

In [72]:
# Delivery Delay
df['DELIVERY_DELAY'] = (df['DELIVEREDON'] - df['DELIVERYDATE']).dt.days

# Payment Delay
df['PAYMENT_DELAY'] = (df['PAYMENTDATE'] - df['INVOICEDUEDATE']).dt.days

In [73]:
df[['PAYMENTDATE','INVOICEDUEDATE','PAYMENT_DELAY','DELIVEREDON','DELIVERYDATE','DELIVERY_DELAY']].head()

Unnamed: 0,PAYMENTDATE,INVOICEDUEDATE,PAYMENT_DELAY,DELIVEREDON,DELIVERYDATE,DELIVERY_DELAY
0,2022-10-11,2022-09-28,13,2022-09-02,2022-08-28,5
1,2022-09-07,2022-09-16,-9,2022-08-22,2022-08-20,2
2,2022-06-09,2022-06-09,0,2022-05-16,2022-05-14,2
3,2022-06-09,2022-06-09,0,2022-05-16,2022-05-14,2
4,2024-03-25,2024-03-13,12,2024-02-12,2024-02-10,2


In [74]:
def set_flag(delay):
    return 'Y' if delay >= 1 else 'N'

In [75]:
df['PAYMENT_DELAY_FLAG'] = df['PAYMENT_DELAY'].apply(set_flag)
df['DELIVERY_DELAY_FLAG'] = df['DELIVERY_DELAY'].apply(set_flag)

In [76]:
df['PAYMENT_DELAY_FLAG'].value_counts()

PAYMENT_DELAY_FLAG
Y    89412
N    81033
Name: count, dtype: int64

In [77]:
df['DELIVERY_DELAY_FLAG'].value_counts()

DELIVERY_DELAY_FLAG
Y    170445
Name: count, dtype: int64

In [78]:
print(list(df.columns))

['ORDERID', 'ORDERITEMID', 'PRODUCTID', 'PRODUCTNAME', 'ORDERQUANTITY', 'UNITPRICE', 'ORDERVALUE', 'CUSTOMERID', 'CUSTOMERNAME', 'PRODUCTCATEGORY', 'ORDERDATE', 'ORDERSTATUS', 'ORDERAMOUNT', 'ORDERCOUNT', 'SHIPMENTID', 'SHIPMENTDATE', 'DELIVERYDATE', 'DELIVEREDON', 'CARRIER', 'SHIPMENTSTATUS', 'INVOICEID', 'INVOICEDATE', 'INVOICEDUEDATE', 'PAYMENTDATE', 'INVOICESTATUS', 'PAYMENTMETHOD', 'PRODUCTTYPE', 'QUANTITY', 'COMPANYTYPE', 'CONTACTDETAILS', 'EMAILDETAILS', 'ADDRESSDETAILS', 'ADMINDETAILS', 'CREDITLIMIT', 'CUSTOMERSINCE', 'PAYMENTTERMS', 'CREDITLIMITTYPE', 'CUSTOMERTYPE', 'SUPPLIERNAME', 'SUPPLIERID', 'CC_CUSTOMER_LIFETIME_VALUE', 'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY', 'CC_PREFERRED_PAYMENT_METHOD', 'CC_PREFERRED_PRODUCT_CATEGORY', 'CC_PREFERRED_PRODUCT_TYPE', 'CC_AVG_ORDER_PROCESSING_TIME', 'CC_AVG_INVOICE_PROCESSING_TIME', 'CC_AVG_DELIVERY_DELAY', 'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS', 'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY',

In [96]:
cols = ['ORDERQUANTITY','ORDERAMOUNT','ORDERCOUNT',
'CC_CUSTOMER_LIFETIME_VALUE', 'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY', 'CC_AVG_ORDER_PROCESSING_TIME', 'CC_AVG_INVOICE_PROCESSING_TIME', 
'CC_AVG_DELIVERY_DELAY', 'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS', 'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY', 
'CC_DELIVERY_CONSISTENCY', 'CC_PAYMENT_CONSISTENCY', 'CAT_TOTAL_SALES_VOLUME', 'CAT_TOTAL_SALES_VALUE', 
'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE', 'CAT_NUMBER_OF_ORDERS', 'CAT_AVG_DELIVERY_TIME', 'CAT_AVG_INVOICE_TIME', 
'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY', 'SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME', 'SP_AVG_DELIVERY_DELAY', 
'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY', 'SP_DELIVERY_CONSISTENCY', 'MONTH', 'YEAR', 'WEEK', 'WEEKEND', 'DAY_OF_THE_MONTH', 'PAYMENT_DELAY_FLAG']

In [79]:
cols = ['ORDERQUANTITY','ORDERAMOUNT','ORDERCOUNT',
'CC_CUSTOMER_LIFETIME_VALUE', 'CC_ORDER_FREQUENCY', 'CC_AVERAGE_ORDER_VALUE', 'CC_RECENCY', 'CC_AVG_ORDER_PROCESSING_TIME', 'CC_AVG_INVOICE_PROCESSING_TIME', 
'CC_AVG_DELIVERY_DELAY', 'CC_AVG_PAYMENT_DELAY', 'CC_TOTAL_ORDERS', 'CC_TOTAL_DELAYS', 'CC_ORDER_CONSISTENCY', 'CC_INVOICE_CONSISTENCY', 
'CC_DELIVERY_CONSISTENCY', 'CC_PAYMENT_CONSISTENCY', 'PR_TOTAL_SALES_VOLUME', 'PR_TOTAL_SALES_VALUE', 'PR_AVG_ORDER_QUANTITY', 
'PR_AVG_UNIT_PRICE', 'PR_NUMBER_OF_ORDERS', 'PR_AVG_DELIVERY_TIME', 'PR_AVG_INVOICE_TIME', 'CAT_TOTAL_SALES_VOLUME', 'CAT_TOTAL_SALES_VALUE', 
'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE', 'CAT_NUMBER_OF_ORDERS', 'CAT_AVG_DELIVERY_TIME', 'CAT_AVG_INVOICE_TIME', 
'SP_CUSTOMER_LIFETIME_VALUE', 'SP_ORDER_FREQUENCY', 'SP_AVERAGE_ORDER_VALUE', 'SP_AVG_ORDER_PROCESSING_TIME', 'SP_AVG_DELIVERY_DELAY', 
'SP_TOTAL_ORDERS', 'SP_ORDER_CONSISTENCY', 'SP_DELIVERY_CONSISTENCY', 'MONTH', 'YEAR', 'WEEK', 'WEEKEND', 'DAY_OF_THE_MONTH', 'PAYMENT_DELAY_FLAG']

In [95]:
df.head()

Unnamed: 0,ORDERID,ORDERITEMID,PRODUCTID,PRODUCTNAME,ORDERQUANTITY,UNITPRICE,ORDERVALUE,CUSTOMERID,CUSTOMERNAME,PRODUCTCATEGORY,...,SP_DELIVERY_CONSISTENCY,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH,DELIVERY_DELAY,PAYMENT_DELAY,PAYMENT_DELAY_FLAG,DELIVERY_DELAY_FLAG
0,OR-49f307a7-e4ba-4701-a46c-c0904a6b31a8,OI-fd3d2163-475a-4bb5-b1a1-f6125950a069,PID-46bc21c7-2e4a-49bb-b0ac-cd1d2e8c0cb8,Industrial Robot,47,30000,1410000,CID-5382e460-8bd9-428a-a1bf-e0f039c3612a,Taylor Ltd,Industrial Equipment,...,1.415584,8,2022,32,False,10,5,13,Y,Y
1,OR-a136395d-b459-4bb3-8a91-5a55d08bb0f7,OI-86200f6b-74b3-42bb-a140-1917c12fa781,PID-df71fb0e-604f-4ded-8a16-850f47932a2a,Bottled Water,7451,5,37255,CID-7247edbb-6148-4095-973c-5afb2409a37b,Sutura,Food Beverages,...,0.49997,8,2022,33,False,15,2,-9,N,Y
2,OR-aaf33297-ee46-4f2b-8348-5dd2fdcb7e54,OI-2a9c9bea-05cd-45f9-8a5c-9ee28df7167b,PID-22bab63a-1cf7-49cb-9257-dfd144a9bb86,Herbal Tea,100,2,200,CID-c2f0a78d-6978-492f-9f80-7a45887f5ba9,OTTAVA,Food Beverages,...,0.49997,5,2022,19,False,9,2,0,N,Y
3,OR-aaf33297-ee46-4f2b-8348-5dd2fdcb7e54,OI-484c6db6-f7cd-43a0-abfd-c47b9cd76d58,PID-ccb448de-74e0-4989-b8e6-e3a427c63dba,Protein Bars,4577,5,22885,CID-c2f0a78d-6978-492f-9f80-7a45887f5ba9,OTTAVA,Food Beverages,...,0.49997,5,2022,19,False,9,2,0,N,Y
4,OR-60912ae9-44aa-490c-94f3-3770af5c0bee,OI-719744d8-f014-4e34-a74c-62b994d0ee23,PID-f9b3d46a-f6a9-4ed4-b206-10656a6cb9f0,Microwave Oven,431,325,140075,CID-19d2d36a-1d5c-4504-9f12-fb1a62ee772c,Vertex Ventures,Consumer Goods,...,0.49997,2,2024,6,False,7,2,12,Y,Y


In [97]:
df_payments = df[cols]

In [99]:
df_payments.info()

<class 'pandas.core.frame.DataFrame'>
Index: 170445 entries, 0 to 173436
Data columns (total 38 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   ORDERQUANTITY                   170445 non-null  int16  
 1   ORDERAMOUNT                     170445 non-null  int32  
 2   ORDERCOUNT                      170445 non-null  int8   
 3   CC_CUSTOMER_LIFETIME_VALUE      170445 non-null  int32  
 4   CC_ORDER_FREQUENCY              170445 non-null  int16  
 5   CC_AVERAGE_ORDER_VALUE          170445 non-null  float64
 6   CC_RECENCY                      170445 non-null  int8   
 7   CC_AVG_ORDER_PROCESSING_TIME    170445 non-null  float64
 8   CC_AVG_INVOICE_PROCESSING_TIME  170445 non-null  float64
 9   CC_AVG_DELIVERY_DELAY           170445 non-null  float64
 10  CC_AVG_PAYMENT_DELAY            170445 non-null  float64
 11  CC_TOTAL_ORDERS                 170445 non-null  int16  
 12  CC_TOTAL_DELAYS      

In [100]:
df_payments.tail()

Unnamed: 0,ORDERQUANTITY,ORDERAMOUNT,ORDERCOUNT,CC_CUSTOMER_LIFETIME_VALUE,CC_ORDER_FREQUENCY,CC_AVERAGE_ORDER_VALUE,CC_RECENCY,CC_AVG_ORDER_PROCESSING_TIME,CC_AVG_INVOICE_PROCESSING_TIME,CC_AVG_DELIVERY_DELAY,...,SP_AVG_DELIVERY_DELAY,SP_TOTAL_ORDERS,SP_ORDER_CONSISTENCY,SP_DELIVERY_CONSISTENCY,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH,PAYMENT_DELAY_FLAG
173432,88,6776,1,64033712,538,59180.879852,-80,10.062847,28.409427,3.050832,...,2.998765,29950,1.706115,1.41788,11,2022,47,False,21,N
173433,4012,37084,2,82160478,564,71073.077855,-89,14.938581,26.097751,3.037197,...,3.018392,32025,1.718133,1.415584,2,2022,8,False,23,N
173434,90,37084,2,82160478,564,71073.077855,-89,14.938581,26.097751,3.037197,...,3.018392,32025,1.718133,1.415584,2,2022,8,False,23,N
173435,279,3892,3,15562685,594,12958.105745,-79,4.513739,14.835137,1.502082,...,1.494201,75789,0.818478,0.49997,7,2022,29,False,19,N
173436,299,3892,3,15562685,594,12958.105745,-79,4.513739,14.835137,1.502082,...,1.494201,75789,0.818478,0.49997,7,2022,29,False,19,N


In [101]:
df_payments.shape

(170445, 38)

In [102]:
df_payments = df_payments.drop_duplicates()

In [103]:
df_payments.shape

(170272, 38)

In [87]:
from sklearn.preprocessing import StandardScaler

# Standardize the predictor variables
scaler = StandardScaler()

X_scaled = scaler.fit_transform(df_payments.drop(columns=['PAYMENT_DELAY_FLAG']))

df_scaled = pd.DataFrame(X_scaled, columns=df_payments.drop(columns=['PAYMENT_DELAY_FLAG']).columns)

In [88]:
df_scaled.shape

(170445, 44)

In [89]:
df_scaled['PAYMENT_DELAY_FLAG'] = df_payments['PAYMENT_DELAY_FLAG']

In [90]:
df_scaled.describe()

Unnamed: 0,ORDERQUANTITY,ORDERAMOUNT,ORDERCOUNT,CC_CUSTOMER_LIFETIME_VALUE,CC_ORDER_FREQUENCY,CC_AVERAGE_ORDER_VALUE,CC_RECENCY,CC_AVG_ORDER_PROCESSING_TIME,CC_AVG_INVOICE_PROCESSING_TIME,CC_AVG_DELIVERY_DELAY,...,SP_AVG_ORDER_PROCESSING_TIME,SP_AVG_DELIVERY_DELAY,SP_TOTAL_ORDERS,SP_ORDER_CONSISTENCY,SP_DELIVERY_CONSISTENCY,MONTH,YEAR,WEEK,WEEKEND,DAY_OF_THE_MONTH
count,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,...,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0,170445.0
mean,6.169751e-18,2.8764380000000004e-17,3.075704e-16,2.7722190000000004e-17,2.235618e-15,-7.445389000000001e-17,-2.562948e-16,7.964815e-16,7.753877000000001e-17,1.332333e-16,...,3.665999e-16,-4.834709e-16,7.653827000000001e-17,-1.284809e-16,4.861389e-16,-4.4605630000000003e-17,3.062189e-14,1.3006500000000001e-17,1.40737e-16,5.110889e-17
std,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,...,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003,1.000003
min,-0.6376814,-0.3535632,-1.797003,-0.3964425,-2.450812,-0.3958847,-1.063359,-0.9895909,-1.659418,-0.9781006,...,-0.9636251,-0.9160209,-1.277401,-0.8763645,-0.91597,-1.560888,-0.3725941,-1.664405,-0.633987,-1.676586
25%,-0.5593317,-0.3211155,-0.4548965,-0.3476796,-0.713455,-0.3460621,-0.8224153,-0.9604887,-0.6690918,-0.916997,...,-0.9636251,-0.9160209,-0.7177712,-0.8763645,-0.91597,-0.950337,-0.3725941,-0.8967045,-0.633987,-0.8807546
50%,-0.4008101,-0.2764749,0.88721,-0.3097376,-0.01851207,-0.3084769,-0.3405279,-0.2287718,-0.0901603,-0.8815147,...,-0.2333724,-0.9152748,-0.6357864,-0.005029582,-0.9159313,-0.03450997,-0.3725941,0.01057807,-0.633987,0.02876724
75%,-0.04550301,-0.1830724,0.88721,-0.1974615,0.6267921,-0.1999869,0.6232468,0.9418524,0.4420396,1.077911,...,0.9657561,1.086037,1.093361,0.4378922,1.093798,0.881317,-0.3725941,0.8480697,1.577319,0.8245988
max,5.434121,11.60366,0.88721,3.583095,2.364149,3.526215,3.032684,1.615001,4.21894,1.282693,...,1.584408,1.112154,1.093361,2.527,1.093878,1.797144,3.293657,1.894934,1.577319,1.734121


In [85]:
df_scaled['PAYMENT_DELAY_FLAG'].isna().sum()

2992

# Model Training for Payment Delay Prediction

In [47]:
# Separate features and target variable
X = df_scaled.drop(columns=['PAYMENT_DELAY_FLAG'])
y = df_scaled['PAYMENT_DELAY_FLAG']

In [48]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [55]:
y_train

106441      N
141237      Y
67668       Y
97177       Y
93093       N
         ... 
119879      Y
103694      Y
131932    NaN
146867      Y
121958      N
Name: PAYMENT_DELAY_FLAG, Length: 127833, dtype: object

In [52]:
#from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

ValueError: Input contains NaN

In [138]:
# Feature Selection using Recursive Feature Elimination (RFE)
model = LinearRegression()
rfe = RFE(model, n_features_to_select=10)  # Adjust the number of features to select
rfe.fit(X_train, y_train)

In [139]:
# Get the selected features
selected_features_rfe = X.columns[rfe.support_]
print(f'Selected features by RFE: {selected_features_rfe}')

Selected features by RFE: Index(['UNITPRICE', 'PR_AVG_UNIT_PRICE', 'PR_NUMBER_OF_ORDERS',
       'CAT_TOTAL_SALES_VOLUME', 'CAT_TOTAL_SALES_VALUE',
       'CAT_AVG_ORDER_QUANTITY', 'CAT_AVG_UNIT_PRICE', 'CAT_NUMBER_OF_ORDERS',
       'CAT_AVG_DELIVERY_TIME', 'CAT_AVG_INVOICE_TIME'],
      dtype='object')


In [140]:
# Feature Selection using Random Forest feature importance
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)

In [141]:
# Get feature importances
importances = model_rf.feature_importances_
indices = np.argsort(importances)[-10:]  # Select top 10 features
selected_features_rf = X.columns[indices]
print(f'Selected features by Random Forest: {selected_features_rf}')

Selected features by Random Forest: Index(['WEEKEND', 'PR_AVG_INVOICE_TIME', 'PR_AVG_DELIVERY_TIME', 'YEAR',
       'MONTH', 'DELIVERY_DELAY', 'WEEK', 'DAY_OF_THE_MONTH', 'ORDERQUANTITY',
       'ORDERVALUE'],
      dtype='object')


In [142]:
# Combine selected features from both methods (optional)
selected_features = list(set(selected_features_rfe) | set(selected_features_rf))
print(f'Combined selected features: {selected_features}')

Combined selected features: ['ORDERVALUE', 'UNITPRICE', 'CAT_AVG_INVOICE_TIME', 'PR_AVG_DELIVERY_TIME', 'WEEKEND', 'PR_NUMBER_OF_ORDERS', 'CAT_AVG_ORDER_QUANTITY', 'MONTH', 'WEEK', 'CAT_AVG_UNIT_PRICE', 'DELIVERY_DELAY', 'YEAR', 'CAT_TOTAL_SALES_VOLUME', 'DAY_OF_THE_MONTH', 'CAT_NUMBER_OF_ORDERS', 'PR_AVG_INVOICE_TIME', 'CAT_AVG_DELIVERY_TIME', 'PR_AVG_UNIT_PRICE', 'ORDERQUANTITY', 'CAT_TOTAL_SALES_VALUE']


In [143]:
# Reduce the dataframe to selected features
X_train_selected = X_train[selected_features]
X_test_selected = X_test[selected_features]

In [144]:
# Define a list of regression models to train
models = [
    ('Linear Regression', LinearRegression()),
    ('Random Forest', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('Support Vector Regressor', SVR())
]

In [None]:
for name, model in models:
    pipeline = Pipeline([
        ('regressor', model)
    ])
    
    # Train the model
    pipeline.fit(X_train_selected, y_train)
    
    # Predict on the test set
    y_pred = pipeline.predict(X_test_selected)
    
    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f'{name} - Mean Squared Error: {mse}')
    print(f'{name} - Mean Absolute Error: {mae}')
    print(f'{name} - MAPE : {mape}')
    print(f'{name} - R2 Score : {r2}')
    
    # Cross-validation score
    #cv_scores = cross_val_score(pipeline, X, y, cv=5, scoring='neg_mean_squared_error')
    #print(f'{name} - Cross-Validation MSE: {-cv_scores.mean()}')

Linear Regression - Mean Squared Error: 0.07496596300002328
Linear Regression - Mean Absolute Error: 0.234970147449601
Linear Regression - MAPE : 50505571160638.22
Linear Regression - R2 Score : 0.0003977951881123376
Random Forest - Mean Squared Error: 0.08010892032955419
Random Forest - Mean Absolute Error: 0.2408518982954685
Random Forest - MAPE : 50410238487456.03
Random Forest - R2 Score : -0.06817881318348884
