# Importing Modules

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Dataset

In [3]:

df = pd.read_csv('cirrhosis.csv')
df.head()

Unnamed: 0,ID,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage
0,1,400,D,D-penicillamine,21464,F,Y,Y,Y,Y,14.5,261.0,2.6,156.0,1718.0,137.95,172.0,190.0,12.2,4.0
1,2,4500,C,D-penicillamine,20617,F,N,Y,Y,N,1.1,302.0,4.14,54.0,7394.8,113.52,88.0,221.0,10.6,3.0
2,3,1012,D,D-penicillamine,25594,M,N,N,N,S,1.4,176.0,3.48,210.0,516.0,96.1,55.0,151.0,12.0,4.0
3,4,1925,D,D-penicillamine,19994,F,N,Y,Y,S,1.8,244.0,2.54,64.0,6121.8,60.63,92.0,183.0,10.3,4.0
4,5,1504,CL,Placebo,13918,F,N,Y,Y,N,3.4,279.0,3.53,143.0,671.0,113.15,72.0,136.0,10.9,3.0


# Preprocessing

In [4]:
df=df.replace({"F":1,"M":0})
df=df.replace({"Y":1,"N":0, "S":2})

df.loc[df["Stage"] == 1, "Stage"] = 0
df.loc[df["Stage"] == 2, "Stage"] = 0
df.loc[df["Stage"] == 3, "Stage"] = 1
df.loc[df["Stage"] == 4, "Stage"] = 1
df['Stage'] = df['Stage'].replace(np.nan, 0)

df.loc[df["Status"] == 'D', "Status"] = 0
df.loc[df["Status"] == 'C', "Status"] = 1
df.loc[df["Status"] == 'CL', "Status"] = 2

df.loc[df["Drug"] == 'D-penicillamine', "Drug"] = 0
df.loc[df["Drug"] == 'Placebo', "Drug"] = 1


cols = df.columns[df.dtypes.eq('object')]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')

df['Drug'].mean()
df['Drug'].fillna(df['Drug'].median(), inplace=True)
df['N_Days'].fillna(df['N_Days'].mean(), inplace=True)
df['Status'].fillna(df['Status'].median(), inplace=True)
df['Sex'].fillna(df['Sex'].median(), inplace=True)
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Ascites'].fillna(df['Ascites'].median(), inplace=True)
df['Hepatomegaly'].fillna(df['Hepatomegaly'].median(), inplace=True)
df['Spiders'].fillna(df['Spiders'].median(), inplace=True)
df['Edema'].fillna(df['Edema'].median(), inplace=True)

from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')

df['Bilirubin'] = imputer.fit_transform(df[['Bilirubin']])
df['Cholesterol'] = imputer.fit_transform(df[['Cholesterol']])
df['Albumin'] = imputer.fit_transform(df[['Albumin']])
df['Copper'] = imputer.fit_transform(df[['Copper']])
df['Alk_Phos'] = imputer.fit_transform(df[['Alk_Phos']])
df['SGOT'] = imputer.fit_transform(df[['SGOT']])
df['Tryglicerides'] = imputer.fit_transform(df[['Tryglicerides']])
df['Platelets'] = imputer.fit_transform(df[['Platelets']])
df['Prothrombin'] = imputer.fit_transform(df[['Prothrombin']])

df.head()

Unnamed: 0,ID,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage
0,1,400,0,0.0,21464,1,1.0,1.0,1.0,1,14.5,261.0,2.6,156.0,1718.0,137.95,172.0,190.0,12.2,1.0
1,2,4500,1,0.0,20617,1,0.0,1.0,1.0,0,1.1,302.0,4.14,54.0,7394.8,113.52,88.0,221.0,10.6,1.0
2,3,1012,0,0.0,25594,0,0.0,0.0,0.0,2,1.4,176.0,3.48,210.0,516.0,96.1,55.0,151.0,12.0,1.0
3,4,1925,0,0.0,19994,1,0.0,1.0,1.0,2,1.8,244.0,2.54,64.0,6121.8,60.63,92.0,183.0,10.3,1.0
4,5,1504,2,1.0,13918,1,0.0,1.0,1.0,0,3.4,279.0,3.53,143.0,671.0,113.15,72.0,136.0,10.9,1.0


# Setting up Features and Target

In [5]:
X = df.iloc[:, :-1].values
Y = df.iloc[:, -1].values

# Splitting Data into Testing and Training data


In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20)

# Decision Tree Classifier

In [7]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

# Classification Report

In [9]:
from sklearn.metrics import classification_report, confusion_matrix
print("Confusion Matrix: ")
print(confusion_matrix(y_test, y_pred))
print("Classification Report: ")
print(classification_report(y_test, y_pred))

Confusion Matrix: 
[[ 7 11]
 [15 51]]
Classification Report: 
              precision    recall  f1-score   support

         0.0       0.32      0.39      0.35        18
         1.0       0.82      0.77      0.80        66

    accuracy                           0.69        84
   macro avg       0.57      0.58      0.57        84
weighted avg       0.71      0.69      0.70        84



# Accuracy

In [12]:
from sklearn import metrics
accuracy = metrics.accuracy_score(y_test, y_pred)
print("Accuracy - ",accuracy)

Accuracy -  0.6904761904761905
