In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, roc_auc_score, roc_curve, classification_report
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [2]:
!pip install --upgrade pymysql sqlalchemy

Collecting pymysql
  Downloading PyMySQL-1.1.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m364.2 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: pymysql
Successfully installed pymysql-1.1.0


In [3]:
import pandas as pd
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()
username = 'r****'
password = '*******'
host = '*****.mysql.database.azure.com'
port = 3306
dbname = 'onlineshoppingdb'

conn_string = f'mysql+pymysql://{username}:{password}@{host}:{port}/{dbname}'
engine = create_engine(conn_string, connect_args={'charset': 'utf8'})



##Data Collection and Preprocessing

In [None]:
sql_queries = 'Select o.*, s.Shipment_id, s.Shipped_date, s.Estimated_arrival, s.Warehouse, s.Mode_of_shipment, s.Weight, c.Membership, c.Purchase_frequency, s.Reach_on_time from `ORDER` as o LEFT JOIN SHIPMENT as s on s.Order_id = o.Order_id LEFT JOIN CUSTOMER as c on c.Customer_id = o.Customer_id;'
df = pd.read_sql(sql_queries, con = engine)

There are not missing values in the dataset, so we don't have to impute or drop missing values.

In [None]:
#Drop irrelavent feature ID
df = df.drop(['Order_id', 'Customer_id', 'Date', 'Status', 'Shipped_date', 'Estimated_arrival'], axis=1)
#One-hot encoding
categorical_variables = ['Warehouse', 'Mode_of_Shipment', 'Membership']
numerical_variables = ['Order_value', 'Discount', 'Number_of_itmes', 'Weight',	'Purchase_frequency']
data = pd.get_dummies(df, columns=categorical_variables)


data

In [None]:
#Split data to training and test sets
target = data['Reach_on_time'].to_numpy()
data = data.drop(['Reach_on_time'], axis=1)
columns = data.columns
drop_index = [columns.get_loc(c) for c in['Warehouse_A', 'Mode_of_Shipment_Flight', 'Membership_Bronze']]
data = data.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [None]:
#Normalize data with z-score standardization
scaler = StandardScaler()

# Fit the scaler on the training data and standardize the first 5 numerical columns
scaler.fit(X_train[:,:6])

X_train_num = scaler.transform(X_train[:,:6])
X_test_num = scaler.transform(X_test[:,:6])

X_train = np.concatenate([X_train_num, X_train[:, 6:]], axis=1)
X_test = np.concatenate([X_test_num, X_test[:, 6:]], axis=1)


In [None]:
X_train[0,:]

array([-0.9256049813911437, 1.4220870453134193, -0.6701198555855346,
       -0.37868020246479045, -0.39291127638365464, 0.31190126626315484,
       False, False, False, False, True, False, False, True, False, True,
       False, False], dtype=object)

In [None]:
X_train_num

array([[-0.92560498,  1.42208705, -0.67011986, -0.3786802 , -0.39291128,
         0.31190127],
       [ 1.69710806,  0.00394867, -0.17324572,  0.27363129, -0.63896845,
         0.53260565],
       [-1.79984266,  1.42208705,  1.37948593, -0.3786802 , -0.45442557,
        -1.58517822],
       ...,
       [-0.0513673 , -0.70512051, -1.5189465 , -1.03099169, -0.70048274,
         1.33227692],
       [ 0.82287038,  0.00394867, -1.37402487,  1.57825427,  1.32948894,
        -1.59923972],
       [-0.0513673 ,  0.00394867, -1.1462909 , -0.3786802 , -0.70048274,
         0.55583769]])

##Modeling

In [None]:
models = []
results = pd.DataFrame({
    'Accuracy': [],
    'AUC': [],
    'F1_Score': [],
    'Precision': [],
    'Recall': []
})
results

Unnamed: 0,Accuracy,AUC,F1_Score,Precision,Recall


In [None]:
def metrics_evaluation(y_test, y_pred, model):
    metrics_list = []
    accuracy = metrics.accuracy_score(y_test, y_pred)
    metrics_list.append(accuracy)

    fpr, tpr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred)

    auc_pred = metrics.auc(x=fpr, y=tpr)
    metrics_list.append(auc_pred)

    f1_pred = metrics.f1_score(y_test, y_pred)
    metrics_list.append(f1_pred)

    precision_pred = metrics.precision_score(y_test, y_pred)
    metrics_list.append(precision_pred)

    recall_pred = metrics.recall_score(y_test, y_pred)
    metrics_list.append(recall_pred)

    results.loc[model] = metrics_list
    print(f'Accuracy is {np.round(accuracy,4)}')
    print(f'AUC is {np.round(auc_pred,4)}')
    print(f'F1 score is {np.round(f1_pred,4)}')
    print(f'Precision is {np.round(precision_pred,4)}')
    print(f'Recall is {np.round(recall_pred,4)}')

**KNN**

In [None]:
knn = KNeighborsClassifier(4).fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [None]:
y_pred

array([1, 0, 1, ..., 0, 0, 0], dtype=int64)

In [None]:
metrics_evaluation(y_test, y_pred, 'KNN')

Accuracy is 0.6445
AUC is 0.6635
F1 score is 0.6521
Precision is 0.7773
Recall is 0.5617


**Decision Tree**

In [None]:
decision_tree_model = DecisionTreeClassifier().fit(X_train, y_train)
y_pred = decision_tree_model.predict(X_test)

In [None]:
dt_importance = decision_tree_model.feature_importances_

In [None]:
metrics_evaluation(y_test, y_pred, 'Decision Tree')

Accuracy is 0.6459
AUC is 0.6322
F1 score is 0.7028
Precision is 0.6998
Recall is 0.7057


**Random Forest**

In [None]:
random_forest_model = RandomForestClassifier().fit(X_train, y_train)
y_pred = random_forest_model.predict(X_test)

In [None]:
rf_importance = random_forest_model.feature_importances_

In [None]:
metrics_evaluation(y_test, y_pred, 'Random Forest')

Accuracy is 0.6595
AUC is 0.6612
F1 score is 0.6944
Precision is 0.7426
Recall is 0.6521


**Gradient Boost**

In [None]:
gradient_boost_model = GradientBoostingClassifier().fit(X_train, y_train)
y_pred = gradient_boost_model.predict(X_test)

In [None]:
gb_importance = gradient_boost_model.feature_importances_

In [None]:
metrics_evaluation(y_test, y_pred, 'Gradient Boost')

Accuracy is 0.6882
AUC is 0.7159
F1 score is 0.6833
Precision is 0.8595
Recall is 0.567


##Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

Decision Tree

In [None]:
# Define the hyperparameters values
max_depth_values = [3, 5, 10, None]
min_samples_split_values = [2, 5, 10]

best_accuracy_dt = 0
best_params_dt = {}

for max_depth in max_depth_values:
    for min_samples_split in min_samples_split_values:
        dt = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split, random_state=42).fit(X_train, y_train)
        y_pred = dt.predict(X_test)
        accuracy = metrics.accuracy_score(y_test, y_pred)
        fpr, tpr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred)
        auc_pred = metrics.auc(x=fpr, y=tpr)
        precision_pred = metrics.precision_score(y_test, y_pred)
        recall_pred = metrics.recall_score(y_test, y_pred)
        f1_pred = metrics.f1_score(y_test, y_pred)

        if accuracy > best_accuracy_dt:
            best_accuracy_dt = accuracy
            best_pred = y_pred
            best_params_dt = {'max_depth': max_depth, 'min_samples_split': min_samples_split}

print("Best Parameters for Decision Tree:", best_params_dt)
print("Best Accuracy for Decision Tree:", best_accuracy_dt)


Best Parameters for Decision Tree: {'max_depth': 5, 'min_samples_split': 2}
Best Accuracy for Decision Tree: 0.685


In [None]:
metrics_evaluation(y_test, best_pred, 'Decision Tree (Hyperparameter Tuning)')

Accuracy is 0.685
AUC is 0.7225
F1 score is 0.6624
Precision is 0.9091
Recall is 0.5211


Gradient Boost

In [None]:
# Define the hyperparameters values
n_estimators_values = [100, 200, 300]
learning_rate_values = [0.01, 0.1, 0.2]

best_accuracy_gb = 0
best_params_gb = {}

for n_estimators in n_estimators_values:
    for learning_rate in learning_rate_values:
        gb = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate, random_state=42).fit(X_train, y_train)
        y_pred = gb.predict(X_test)
        accuracy = metrics.accuracy_score(y_test, y_pred)
        fpr, tpr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred)
        auc_pred = metrics.auc(x=fpr, y=tpr)
        precision_pred = metrics.precision_score(y_test, y_pred)
        recall_pred = metrics.recall_score(y_test, y_pred)
        f1_pred = metrics.f1_score(y_test, y_pred)

        if accuracy > best_accuracy_gb:
            best_accuracy_gb = accuracy
            best_pred = y_pred
            best_params_gb = {'n_estimators': n_estimators, 'learning_rate': learning_rate}

print("Best Parameters for Gradient Boost:", best_params_gb)
print("Best Accuracy for Gradient Boost:", best_accuracy_gb)

Best Parameters for Gradient Boost: {'n_estimators': 300, 'learning_rate': 0.01}
Best Accuracy for Gradient Boost: 0.6913636363636364


In [None]:
metrics_evaluation(y_test, best_pred, 'Gradient Boost (Hyperparameter Tuning)')

Accuracy is 0.6914
AUC is 0.7316
F1 score is 0.6647
Precision is 0.9347
Recall is 0.5157


SVM

In [None]:
from sklearn.svm import SVC
svm = SVC(kernel='linear', random_state=42).fit(X_train, y_train)  # You can change the kernel as needed
y_pred_svm = svm.predict(X_test)
accuracy_svm = metrics.accuracy_score(y_test, y_pred_svm)
fpr_svm, tpr_svm, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred_svm)
auc_svm = metrics.auc(x=fpr_svm, y=tpr_svm)
precision_svm = metrics.precision_score(y_test, y_pred_svm)
recall_svm = metrics.recall_score(y_test, y_pred_svm)
f1_svm = metrics.f1_score(y_test, y_pred_svm)

print("Accuracy for SVM:", accuracy_svm)
print("AUC for SVM:", auc_svm)
print("Precision for SVM:", precision_svm)
print("Recall for SVM:", recall_svm)
print("F1 score for SVM:", f1_svm)
metrics_evaluation(y_test, y_pred_svm, 'SVM')

Accuracy for SVM: 0.6704545454545454
AUC for SVM: 0.6827307947515999
Precision for SVM: 0.7815533980582524
Recall for SVM: 0.6168582375478927
F1 score for SVM: 0.6895074946466809
Accuracy is 0.6705
AUC is 0.6827
F1 score is 0.6895
Precision is 0.7816
Recall is 0.6169


Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state=42).fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
accuracy_lr = metrics.accuracy_score(y_test, y_pred_lr)
fpr_lr, tpr_lr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred_lr)
auc_lr = metrics.auc(x=fpr_lr, y=tpr_lr)
precision_lr = metrics.precision_score(y_test, y_pred_lr)
recall_lr = metrics.recall_score(y_test, y_pred_lr)
f1_lr = metrics.f1_score(y_test, y_pred_lr)

print("Accuracy for Logistic Regression:", accuracy_lr)
print("AUC for Logistic Regression:", auc_lr)
print("Precision for Logistic Regression:", precision_lr)
print("Recall for Logistic Regression:", recall_lr)
print("F1 score for Logistic Regression:", f1_lr)
metrics_evaluation(y_test, y_pred_lr, 'Logistic Regression')

Accuracy for Logistic Regression: 0.644090909090909
AUC for Logistic Regression: 0.6306706051071299
Precision for Logistic Regression: 0.6989329268292683
Recall for Logistic Regression: 0.7026819923371648
F1 score for Logistic Regression: 0.7008024455483378
Accuracy is 0.6441
AUC is 0.6307
F1 score is 0.7008
Precision is 0.6989
Recall is 0.7027


Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB().fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
accuracy_nb = metrics.accuracy_score(y_test, y_pred_nb)
fpr_nb, tpr_nb, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred_nb)
auc_nb = metrics.auc(x=fpr_nb, y=tpr_nb)
precision_nb = metrics.precision_score(y_test, y_pred_nb)
recall_nb = metrics.recall_score(y_test, y_pred_nb)
f1_nb = metrics.f1_score(y_test, y_pred_nb)

print("Accuracy for Naive Bayes:", accuracy_nb)
print("AUC for Naive Bayes:", auc_nb)
print("Precision for Naive Bayes:", precision_nb)
print("Recall for Naive Bayes:", recall_nb)
print("F1 score for Naive Bayes:", f1_nb)
metrics_evaluation(y_test, y_pred_nb, 'Naive Bayes')

Accuracy for Naive Bayes: 0.6531818181818182
AUC for Naive Bayes: 0.7022217941308675
Precision for Naive Bayes: 0.9486754966887417
Recall for Naive Bayes: 0.43908045977011495
F1 score for Naive Bayes: 0.6003143006809849
Accuracy is 0.6532
AUC is 0.7022
F1 score is 0.6003
Precision is 0.9487
Recall is 0.4391


AdaBoost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
best_accuracy_adaboost = 0
best_params_adaboost = {}

for n_estimators in [50, 100, 200]:
    adaboost = AdaBoostClassifier(n_estimators=n_estimators, random_state=42).fit(X_train, y_train)
    y_pred = adaboost.predict(X_test)
    accuracy = metrics.accuracy_score(y_test, y_pred)
    fpr, tpr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred)
    auc_pred = metrics.auc(x=fpr, y=tpr)
    precision_pred = metrics.precision_score(y_test, y_pred)
    recall_pred = metrics.recall_score(y_test, y_pred)
    f1_pred = metrics.f1_score(y_test, y_pred)

    if accuracy > best_accuracy_adaboost:
        best_accuracy_adaboost = accuracy
        best_pred = y_pred
        best_params_adaboost = {'n_estimators': n_estimators}

print("Best Parameters for AdaBoost:", best_params_adaboost)
print("Best Accuracy for AdaBoost:", best_accuracy_adaboost)
metrics_evaluation(y_test, best_pred, 'AdaBoost (Hyperparameter Tuning)')


Best Parameters for AdaBoost: {'n_estimators': 100}
Best Accuracy for AdaBoost: 0.6859090909090909
Accuracy is 0.6859
AUC is 0.701
F1 score is 0.7007
Precision is 0.8058
Recall is 0.6199


Extra Trees

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
best_accuracy_extra_trees = 0
best_params_extra_trees = {}

for max_depth in max_depth_values:
    for min_samples_split in min_samples_split_values:
        extra_trees = ExtraTreesClassifier(max_depth=max_depth, min_samples_split=min_samples_split, random_state=42).fit(X_train, y_train)
        y_pred = extra_trees.predict(X_test)
        accuracy = metrics.accuracy_score(y_test, y_pred)
        fpr, tpr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred)
        auc_pred = metrics.auc(x=fpr, y=tpr)
        precision_pred = metrics.precision_score(y_test, y_pred)
        recall_pred = metrics.recall_score(y_test, y_pred)
        f1_pred = metrics.f1_score(y_test, y_pred)

        if accuracy > best_accuracy_extra_trees:
            best_accuracy_extra_trees = accuracy
            best_pred = y_pred
            best_params_extra_trees = {'max_depth': max_depth, 'min_samples_split': min_samples_split}

print("Best Parameters for Extra Trees:", best_params_extra_trees)
print("Best Accuracy for Extra Trees:", best_accuracy_extra_trees)
metrics_evaluation(y_test, best_pred, 'Extra Trees (Hyperparameter Tuning)')

Best Parameters for Extra Trees: {'max_depth': 10, 'min_samples_split': 5}
Best Accuracy for Extra Trees: 0.6663636363636364
Accuracy is 0.6664
AUC is 0.6654
F1 score is 0.7045
Precision is 0.7422
Recall is 0.6705


BaggingClassifier

In [None]:
from sklearn.ensemble import BaggingClassifier
bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=50, random_state=42).fit(X_train, y_train)
y_pred_bagging = bagging.predict(X_test)
accuracy_bagging = metrics.accuracy_score(y_test, y_pred_bagging)
fpr_bagging, tpr_bagging, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred_bagging)
auc_bagging = metrics.auc(x=fpr_bagging, y=tpr_bagging)
precision_bagging = metrics.precision_score(y_test, y_pred_bagging)
recall_bagging = metrics.recall_score(y_test, y_pred_bagging)
f1_bagging = metrics.f1_score(y_test, y_pred_bagging)

print("Accuracy for Bagging Classifier:", accuracy_bagging)
print("AUC for Bagging Classifier:", auc_bagging)
print("Precision for Bagging Classifier:", precision_bagging)
print("Recall for Bagging Classifier:", recall_bagging)
print("F1 score for Bagging Classifier:", f1_bagging)
metrics_evaluation(y_test, y_pred_bagging, 'Bagging Classifier')



Accuracy for Bagging Classifier: 0.6722727272727272
AUC for Bagging Classifier: 0.6775936984952589
Precision for Bagging Classifier: 0.763063063063063
Recall for Bagging Classifier: 0.6490421455938697
F1 score for Bagging Classifier: 0.7014492753623188
Accuracy is 0.6723
AUC is 0.6776
F1 score is 0.7014
Precision is 0.7631
Recall is 0.649


XGBoost

In [None]:
from xgboost import XGBClassifier
best_accuracy_xgb = 0
best_params_xgb = {}

for n_estimators in [50, 100, 200]:
    xgb = XGBClassifier(n_estimators=n_estimators, random_state=42).fit(X_train, y_train)
    y_pred = xgb.predict(X_test)
    accuracy = metrics.accuracy_score(y_test, y_pred)
    fpr, tpr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred)
    auc_pred = metrics.auc(x=fpr, y=tpr)
    precision_pred = metrics.precision_score(y_test, y_pred)
    recall_pred = metrics.recall_score(y_test, y_pred)
    f1_pred = metrics.f1_score(y_test, y_pred)

    if accuracy > best_accuracy_xgb:
        best_accuracy_xgb = accuracy
        best_pred = y_pred
        best_params_xgb = {'n_estimators': n_estimators}

print("Best Parameters for XGBoost:", best_params_xgb)
print("Best Accuracy for XGBoost:", best_accuracy_xgb)
metrics_evaluation(y_test, best_pred, 'XGBoost (Hyperparameter Tuning)')

Best Parameters for XGBoost: {'n_estimators': 50}
Best Accuracy for XGBoost: 0.6613636363636364
Accuracy is 0.6614
AUC is 0.6644
F1 score is 0.6943
Precision is 0.7473
Recall is 0.6483


LightGBM

In [None]:
from lightgbm import LGBMClassifier
best_accuracy_lgbm = 0
best_params_lgbm = {}

for num_leaves in [31, 63, 127]:
    lgbm = LGBMClassifier(num_leaves=num_leaves, random_state=42).fit(X_train, y_train)
    y_pred = lgbm.predict(X_test)
    accuracy = metrics.accuracy_score(y_test, y_pred)
    fpr, tpr, _ = metrics.roc_curve(y_true=y_test, y_score=y_pred)
    auc_pred = metrics.auc(x=fpr, y=tpr)
    precision_pred = metrics.precision_score(y_test, y_pred)
    recall_pred = metrics.recall_score(y_test, y_pred)
    f1_pred = metrics.f1_score(y_test, y_pred)

    if accuracy > best_accuracy_lgbm:
        best_accuracy_lgbm = accuracy
        best_pred = y_pred
        best_params_lgbm = {'num_leaves': num_leaves}

print("Best Parameters for LightGBM:", best_params_lgbm)
print("Best Accuracy for LightGBM:", best_accuracy_lgbm)
metrics_evaluation(y_test, best_pred, 'LightGBM (Hyperparameter Tuning)')

[LightGBM] [Info] Number of positive: 5258, number of negative: 3541
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000133 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 582
[LightGBM] [Info] Number of data points in the train set: 8799, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.597568 -> initscore=0.395342
[LightGBM] [Info] Start training from score 0.395342
[LightGBM] [Info] Number of positive: 5258, number of negative: 3541
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 582
[LightGBM] [Info] Number of data points in the train set: 8799, number of used features: 18
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.597568 -> initscore=0.395342
[LightGBM] [I

##Results

In [None]:
results

Unnamed: 0,Accuracy,AUC,F1_Score,Precision,Recall
KNN,0.644545,0.663524,0.652135,0.777306,0.561686
Decision Tree,0.645909,0.632203,0.702785,0.699848,0.705747
Random Forest,0.659545,0.661249,0.69441,0.742583,0.652107
Gradient Boost,0.688182,0.715927,0.683287,0.859466,0.56705
Decision Tree (Hyperparameter Tuning),0.685,0.722548,0.662445,0.909091,0.521073
Gradient Boost (Hyperparameter Tuning),0.691364,0.731597,0.664691,0.934722,0.515709
SVM,0.670455,0.682731,0.689507,0.781553,0.616858
Logistic Regression,0.644091,0.630671,0.700802,0.698933,0.702682
Naive Bayes,0.653182,0.702222,0.600314,0.948675,0.43908
AdaBoost (Hyperparameter Tuning),0.685909,0.701023,0.700736,0.805777,0.619923


In [None]:
importances_features = pd.DataFrame({
    'Feature': columns,
    'Decision Tree': dt_importance,
    'Random Forest': rf_importance,
    'Gradient Boosting': gb_importance
})
importances_features

Unnamed: 0,Feature,Decision Tree,Random Forest,Gradient Boosting
0,Customer_care_calls,0.044594,0.057341,0.008675
1,Customer_rating,0.045428,0.060723,0.004325
2,Cost_of_the_Product,0.169064,0.168789,0.044713
3,Prior_purchases,0.053691,0.063083,0.046151
4,Discount_offered,0.295698,0.209775,0.740991
5,Weight_in_gms,0.223145,0.278897,0.143182
6,Warehouse_block_A,0.019837,0.012047,0.000956
7,Warehouse_block_B,0.013398,0.012968,0.001354
8,Warehouse_block_C,0.012139,0.013181,0.000808
9,Warehouse_block_D,0.016192,0.012938,0.002545
