### Walmart works with thousands of manufacturers. Many of them supply the same product, but with varying quality, delivery reliability, and operational consistency. This causes supply chain inefficiencies, customer complaints, and inventory imbalances. Currently, Walmart’s sourcing decisions are partly manual or based on limited metrics, leading to poor manufacturer retention choices.

In [8]:
## importing important libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings

warnings.filterwarnings("ignore")

%matplotlib inline

In [9]:
df=pd.read_csv('Walmart_gst.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Supplier_ID,GST_Number,Product,Warehouse_Region,Warehouse_Name,On_Time_Delivery_Rate,Order_Accuracy_Percentage,Packaging_Quality_Rating,Return_Replacement_Frequency,Avg_Fulfillment_Time_Days,Compliance_Score,Cost_Effectiveness,Sustainability_Score
0,0,SUP049,HK9OEKZQOLFON7T,Maggi Noodles,East,Mumbai FC,63.78,98.9,4.65,8,3.96,51.4,49.25,54.46
1,1,GROC016,PABP4XNAZ4XQOXX,Cadbury Dairy Milk,South,Bangalore FC,66.16,95.91,4.7,4,2.6,66.84,74.1,55.57
2,2,GROC106,8757UASZQTFJKI5,Amul Butter,East,Kolkata FC,62.77,98.44,4.18,3,3.55,58.03,86.33,71.34
3,3,SUP096,1377FTB4KV3P7N2,Pepsi Cola,West,Bangalore FC,87.86,98.93,4.36,6,4.26,86.03,88.7,90.5
4,4,SUP026,52ZMFX6TK78BV5V,Lays Potato Chips,East,Chennai FC,85.8,92.42,2.47,8,5.71,81.28,44.59,45.55


In [10]:
df['GST_Number'].unique()

array(['HK9OEKZQOLFON7T', 'PABP4XNAZ4XQOXX', '8757UASZQTFJKI5',
       '1377FTB4KV3P7N2', '52ZMFX6TK78BV5V', 'S1XD8W8ZJOF5W9D',
       '7ODRP8H8OKMHNOA', 'ORVDKV1P6Q4XQLE', 'JNWQCYLI7N26JNJ',
       'OQO1QL0ATU1EQA0', '18WINTU5ZCTPVIR', '3R43L2WRDKBV8Y6',
       '9VCWJTUE355M2Q0', 'GRUI8PEHL1FWW72', '1ZXDYP0ACJ6KMAC',
       'S4P4WJ2DEIE7YXG', '9U1PGBMT4FLEWP9', 'QRJ7GNMM21T1RBA'],
      dtype=object)

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Unnamed: 0                    360 non-null    int64  
 1   Supplier_ID                   360 non-null    object 
 2   GST_Number                    360 non-null    object 
 3   Product                       360 non-null    object 
 4   Warehouse_Region              360 non-null    object 
 5   Warehouse_Name                360 non-null    object 
 6   On_Time_Delivery_Rate         360 non-null    float64
 7   Order_Accuracy_Percentage     360 non-null    float64
 8   Packaging_Quality_Rating      360 non-null    float64
 9   Return_Replacement_Frequency  360 non-null    int64  
 10  Avg_Fulfillment_Time_Days     360 non-null    float64
 11  Compliance_Score              360 non-null    float64
 12  Cost_Effectiveness            360 non-null    float64
 13  Susta

In [12]:
# Numerical and Categorical Features
cat_features= [feature for feature in df.columns if df[feature].dtype =='O']
print("categorical features:",len(cat_features))
num_features= [feature for feature in df.columns if df[feature].dtype !='O']
print("numerical features:",len(num_features))

categorical features: 5
numerical features: 9


In [13]:
cat_features

['Supplier_ID', 'GST_Number', 'Product', 'Warehouse_Region', 'Warehouse_Name']

In [14]:
num_features

['Unnamed: 0',
 'On_Time_Delivery_Rate',
 'Order_Accuracy_Percentage',
 'Packaging_Quality_Rating',
 'Return_Replacement_Frequency',
 'Avg_Fulfillment_Time_Days',
 'Compliance_Score',
 'Cost_Effectiveness',
 'Sustainability_Score']

In [None]:
df = df.drop(columns=['Unnamed: 0'])

In [None]:
df.head()

Unnamed: 0,Supplier_ID,GST_Number,Product,Warehouse_Region,Warehouse_Name,On_Time_Delivery_Rate,Order_Accuracy_Percentage,Packaging_Quality_Rating,Return_Replacement_Frequency,Avg_Fulfillment_Time_Days,Compliance_Score,Cost_Effectiveness,Sustainability_Score
0,SUP049,HK9OEKZQOLFON7T,Maggi Noodles,East,Mumbai FC,63.78,98.9,4.65,8,3.96,51.4,49.25,54.46
1,GROC016,PABP4XNAZ4XQOXX,Cadbury Dairy Milk,South,Bangalore FC,66.16,95.91,4.7,4,2.6,66.84,74.1,55.57
2,GROC106,8757UASZQTFJKI5,Amul Butter,East,Kolkata FC,62.77,98.44,4.18,3,3.55,58.03,86.33,71.34
3,SUP096,1377FTB4KV3P7N2,Pepsi Cola,West,Bangalore FC,87.86,98.93,4.36,6,4.26,86.03,88.7,90.5
4,SUP026,52ZMFX6TK78BV5V,Lays Potato Chips,East,Chennai FC,85.8,92.42,2.47,8,5.71,81.28,44.59,45.55


In [None]:
X=df.drop(columns=["Supplier_ID","GST_Number","Product","Warehouse_Region","Warehouse_Name"])

In [None]:
X

Unnamed: 0,On_Time_Delivery_Rate,Order_Accuracy_Percentage,Packaging_Quality_Rating,Return_Replacement_Frequency,Avg_Fulfillment_Time_Days,Compliance_Score,Cost_Effectiveness,Sustainability_Score
0,63.78,98.90,4.65,8,3.96,51.40,49.25,54.46
1,66.16,95.91,4.70,4,2.60,66.84,74.10,55.57
2,62.77,98.44,4.18,3,3.55,58.03,86.33,71.34
3,87.86,98.93,4.36,6,4.26,86.03,88.70,90.50
4,85.80,92.42,2.47,8,5.71,81.28,44.59,45.55
...,...,...,...,...,...,...,...,...
355,81.15,95.20,2.16,3,5.33,75.45,47.90,72.31
356,93.64,90.57,2.96,1,5.48,84.45,75.14,53.14
357,86.36,86.95,3.10,1,5.64,71.04,75.09,84.00
358,80.38,99.99,2.40,8,3.59,58.51,45.82,37.70


In [None]:
# df['Retention_Score'] = (
#     0.20 * df['Compliance_Score'] +
#     0.15 * df['On_Time_Delivery_Rate'] +
#     0.15 * df['Order_Accuracy_Percentage'] +
#     0.10 * df['Packaging_Quality_Rating'] -
#     0.10 * df['Return_Replacement_Frequency'] -
#     0.10 * df['Avg_Fulfillment_Time_Days'] +
#     0.10 * df['Cost_Effectiveness'] +
#     0.10 * df['Sustainability_Score']
# )

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()


In [None]:
X_scaled=scaler.fit_transform(X)

In [None]:
scaled_df=pd.DataFrame(X_scaled,columns=X.columns)

In [None]:
df['Retention_Score'] = (
    0.20 * scaled_df['Compliance_Score'] +
    0.15 * scaled_df['On_Time_Delivery_Rate'] +
    0.15 * scaled_df['Order_Accuracy_Percentage'] +
    0.10 * scaled_df['Packaging_Quality_Rating'] -
    0.10 * scaled_df['Return_Replacement_Frequency'] -
    0.10 * scaled_df['Avg_Fulfillment_Time_Days'] +
    0.10 * scaled_df['Cost_Effectiveness'] +
    0.10 * scaled_df['Sustainability_Score']
)*100

In [None]:
y=df['Retention_Score']

In [None]:
y.head()

0    16.555138
1    33.985435
2    32.605324
3    52.476209
4    21.331788
Name: Retention_Score, dtype: float64

In [None]:
y.min()

np.float64(-1.2840882838830674)