In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn import metrics
from graphviz import Digraph
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Ans a. Load the dataset 
df = pd.read_csv('liver_disease_.csv')
df.head()

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,Yes
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,Yes
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,Yes
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.0,Yes
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.4,Yes


In [3]:
#Let's observe the shape of the dataframe.
df.shape

(583, 11)

In [4]:
#Remove/replace missing values (if any)
df = df.dropna()
df.shape

(579, 11)

In [5]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(df.Gender.unique())
df['Gender_Encoded'] = le.transform(df.Gender)
df.drop(['Gender'], axis = 1, inplace = True)
le.fit(df.Dataset.unique())
df['Dataset_Encoded'] = le.transform(df.Dataset)
df.drop(['Dataset'], axis = 1, inplace = True)

In [6]:
#Feature Selection
feature_cols = ['Age','Gender_Encoded','Total_Bilirubin',
                'Direct_Bilirubin','Alkaline_Phosphotase','Alamine_Aminotransferase',
                'Aspartate_Aminotransferase','Total_Protiens','Albumin','Albumin_and_Globulin_Ratio']

X = df[feature_cols]
y = df.Dataset_Encoded

#importing train_test_split
from sklearn.model_selection import train_test_split

#Split the dataset into train and test with stratification
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)
sc_X=StandardScaler()
X_train=sc_X.fit_transform(X_train)
X_test=sc_X.fit_transform(X_test)

In [7]:
#kernel rbf
classifier = SVC(random_state=0,kernel='rbf')
classifier = classifier.fit(X_train,y_train)

In [8]:
#predict
y_pred = classifier.predict(X_test)
print(y_pred)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]


In [9]:
#confusion matrix
confusion_matrix(y_test,y_pred)
print(confusion_matrix(y_test,y_pred))

[[ 0 34]
 [ 0 82]]


In [10]:
#Accuracy
print("Accuracy:" , metrics.accuracy_score(y_test,y_pred))

Accuracy: 0.7068965517241379


In [11]:
#Precision/Recall/F1
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        34
           1       0.71      1.00      0.83        82

   micro avg       0.71      0.71      0.71       116
   macro avg       0.35      0.50      0.41       116
weighted avg       0.50      0.71      0.59       116



In [12]:
#Kernel as Linear
classifierlinear = SVC(random_state=0,kernel='linear')
classifierlinear = classifierlinear.fit(X_train,y_train)

In [13]:
#predict
y_pred = classifierlinear.predict(X_test)
print(y_pred)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]


In [17]:
#confusion matrix
confusion_matrix(y_test,y_pred)
print(confusion_matrix(y_test,y_pred))

[[ 0 34]
 [ 0 82]]


In [18]:
#Accuracy
print("Accuracy:" , metrics.accuracy_score(y_test,y_pred))

Accuracy: 0.7068965517241379


In [19]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        34
           1       0.71      1.00      0.83        82

   micro avg       0.71      0.71      0.71       116
   macro avg       0.35      0.50      0.41       116
weighted avg       0.50      0.71      0.59       116



In [20]:
#Kernel as polynomial
classifierpoly = SVC(random_state=0,kernel='poly')
classifierpoly = classifierpoly.fit(X_train,y_train)
#predict
y_pred = classifierpoly.predict(X_test)
print(y_pred)
#confusion matrix
confusion_matrix(y_test,y_pred)
print(confusion_matrix(y_test,y_pred))
#Accuracy
print("Accuracy:" , metrics.accuracy_score(y_test,y_pred))
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
 1 1 1 1 1]
[[ 0 34]
 [ 2 80]]
Accuracy: 0.6896551724137931
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        34
           1       0.70      0.98      0.82        82

   micro avg       0.69      0.69      0.69       116
   macro avg       0.35      0.49      0.41       116
weighted avg       0.50      0.69      0.58       116



In [21]:
#Gaussian Kernel
classifiergaus = SVC(random_state=0,kernel='rbf',gamma="auto")
classifiergaus = classifiergaus.fit(X_train,y_train)
#predict
y_pred = classifiergaus.predict(X_test)
print(y_pred)
#confusion matrix
confusion_matrix(y_test,y_pred)
print(confusion_matrix(y_test,y_pred))
#Accuracy
print("Accuracy:" , metrics.accuracy_score(y_test,y_pred))
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1]
[[ 0 34]
 [ 0 82]]
Accuracy: 0.7068965517241379
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        34
           1       0.71      1.00      0.83        82

   micro avg       0.71      0.71      0.71       116
   macro avg       0.35      0.50      0.41       116
weighted avg       0.50      0.71      0.59       116

