### Importing libraries

In [1]:
import pandas as pd

In [42]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

### Importing dataset

In [5]:
df=pd.read_csv('train.csv')

###  One Hot Encoding for categorical variables
#### All our categorical features are nominal so it is better to use one hot encoding

In [40]:
data=pd.get_dummies(df.drop(columns=["ID"]), drop_first=True)
X=data.drop(columns=["Reached.on.Time_Y.N"])
y=data["Reached.on.Time_Y.N"]

### Splitting the data into training and testing dataset

In [23]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)

### Standardization

In [24]:
X.dtypes

Customer_care_calls          int64
Customer_rating              int64
Cost_of_the_Product          int64
Prior_purchases              int64
Discount_offered             int64
Weight_in_gms                int64
Warehouse_block_B             bool
Warehouse_block_C             bool
Warehouse_block_D             bool
Warehouse_block_F             bool
Mode_of_Shipment_Road         bool
Mode_of_Shipment_Ship         bool
Product_importance_low        bool
Product_importance_medium     bool
Gender_M                      bool
dtype: object

In [25]:
#SVM works better on scaled data
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

### Train the svm model classifier
#### RBF: Radial Basis Function/ Gaussian Kernel - it is safe by default because it can handle both linear and non-linear patterns
#### A kernel decides how the algorithm seperates data
#### rbf creates non-linear decision boundaries by mapping data into a higher-dimensional space.

In [39]:
svm_clf=SVC(kernel='rbf', random_state=42)

In [28]:
svm_clf.fit(X_train, y_train)

### Predicting

In [31]:
y_pred=svm_clf.predict(X_test)

### Evaluation metrics

In [32]:
accuracy_score(y_test,y_pred)

0.649090909090909

In [35]:
precision_score(y_test,y_pred)

0.7774358974358975

In [36]:
recall_score(y_test,y_pred)

0.5773038842345773

In [43]:
f1_score(y_test, y_pred)

0.6625874125874126

In [37]:
classification_report(y_test,y_pred)

'              precision    recall  f1-score   support\n\n           0       0.55      0.76      0.63       887\n           1       0.78      0.58      0.66      1313\n\n    accuracy                           0.65      2200\n   macro avg       0.66      0.67      0.65      2200\nweighted avg       0.68      0.65      0.65      2200\n'

In [38]:
confusion_matrix(y_test,y_pred)

array([[670, 217],
       [555, 758]])