In [16]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.svm import SVC
import numpy as np
import pandas as pd

**Loading Data**

We are using Iris dataset from sklearn.datasets for this classification task.

In [9]:
iris = datasets.load_iris()
df = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


**Data Understanding**

In [10]:
df.target.value_counts()

target
0.0    50
1.0    50
2.0    50
Name: count, dtype: int64

This data set contains 3 classes with 50 data points each.

In [11]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


Data ranges of the 4 features, seems to be similar.

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    float64
dtypes: float64(5)
memory usage: 6.0 KB


Data doesnot contains any null values, also having the proper datatypes.

**Creating train-test split**

In [13]:
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,random_state=1)
print(X_train.shape)

(112, 4)


**Modeling SVM using Linear Kernel**

In [15]:
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)

**Evaluation**

In [21]:
y_train_pred =svc.predict(X_train)
y_test_pred = svc.predict(X_test)

In [22]:
confusion_matrix(y_train,y_train_pred)

array([[37,  0,  0],
       [ 0, 32,  2],
       [ 0,  0, 41]], dtype=int64)

In [23]:
confusion_matrix(y_test,y_test_pred)

array([[13,  0,  0],
       [ 0, 16,  0],
       [ 0,  0,  9]], dtype=int64)

In [24]:
print(classification_report(y_test, y_test_pred, target_names=['0','1','2']))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        16
           2       1.00      1.00      1.00         9

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38



We can see that the model has generalised well on the testing data with f1-score above 100% for every class.

**Modeling SVM using RBF kernel**

In [25]:
svc = SVC(kernel='rbf')
svc.fit(X_train,y_train)

In [26]:
y_train_pred =svc.predict(X_train)
y_test_pred = svc.predict(X_test)

In [27]:
confusion_matrix(y_train,y_train_pred)

array([[37,  0,  0],
       [ 0, 31,  3],
       [ 0,  0, 41]], dtype=int64)

In [28]:
confusion_matrix(y_test,y_test_pred)

array([[13,  0,  0],
       [ 0, 15,  1],
       [ 0,  0,  9]], dtype=int64)

In [29]:
print(classification_report(y_test, y_test_pred, target_names=['0','1','2']))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      0.94      0.97        16
           2       0.90      1.00      0.95         9

    accuracy                           0.97        38
   macro avg       0.97      0.98      0.97        38
weighted avg       0.98      0.97      0.97        38



We can see that the model has generalised well on the testing data with f1-score above 95% for every class. But Linear model works better for this data.