# RandomForestClassifier

In [134]:
import numpy as np
import pandas as pd
from sklearn import ensemble
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
from imblearn.over_sampling import SMOTE

In [135]:
patients=pd.read_csv('indian_liver_patient.csv')

In [136]:
patients

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.90,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.00,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2
579,40,Male,0.6,0.1,98,35,31,6.0,3.2,1.10,1
580,52,Male,0.8,0.2,245,48,49,6.4,3.2,1.00,1
581,31,Male,1.3,0.5,184,29,32,6.8,3.4,1.00,1


In [137]:
patients['Gender']=patients['Gender'].apply(lambda x:1 if x=='Male' else 0)

In [138]:
patients['Dataset']=patients['Dataset'].apply(lambda x:1 if x==2 else 0)

In [139]:
patients

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,0,0.7,0.1,187,16,18,6.8,3.3,0.90,0
1,62,1,10.9,5.5,699,64,100,7.5,3.2,0.74,0
2,62,1,7.3,4.1,490,60,68,7.0,3.3,0.89,0
3,58,1,1.0,0.4,182,14,20,6.8,3.4,1.00,0
4,72,1,3.9,2.0,195,27,59,7.3,2.4,0.40,0
...,...,...,...,...,...,...,...,...,...,...,...
578,60,1,0.5,0.1,500,20,34,5.9,1.6,0.37,1
579,40,1,0.6,0.1,98,35,31,6.0,3.2,1.10,0
580,52,1,0.8,0.2,245,48,49,6.4,3.2,1.00,0
581,31,1,1.3,0.5,184,29,32,6.8,3.4,1.00,0


In [140]:
patients.isnull().sum()

Age                           0
Gender                        0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    4
Dataset                       0
dtype: int64

In [141]:
patients=patients.fillna(0.94)

In [142]:
patients.isnull().sum()

Age                           0
Gender                        0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    0
Dataset                       0
dtype: int64

In [143]:
X=patients.loc[:,['Total_Bilirubin', 'Direct_Bilirubin',
       'Alkaline_Phosphotase', 'Alamine_Aminotransferase',
       'Total_Protiens', 'Albumin', 'Albumin_and_Globulin_Ratio']]

In [144]:
y=patients.loc[:,'Dataset']

In [145]:
y.value_counts()

0    416
1    167
Name: Dataset, dtype: int64

In [146]:
from imblearn.over_sampling import SMOTE
smote=SMOTE(sampling_strategy='auto')
X_sm,y_sm=smote.fit_resample(X,y)
y_sm.value_counts()

1    416
0    416
Name: Dataset, dtype: int64

In [147]:
X_train,X_test,y_train,y_test=train_test_split(X_sm,y_sm,test_size=0.2,random_state=123,stratify=y_sm)
#X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=123,stratify=y)


In [148]:

print('Shape training set: X:{}, y:{}'.format(X_train.shape, y_train.shape))
print('Shape test set: X:{}, y:{}'.format(X_test.shape, y_test.shape))


Shape training set: X:(665, 7), y:(665,)
Shape test set: X:(167, 7), y:(167,)


In [149]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X_train)
X_train_sc = sc.transform(X_train)
X_test_sc = sc.transform(X_test)

In [150]:
from sklearn.ensemble import RandomForestClassifier 
model=RandomForestClassifier()

model.fit(X_train_sc,y_train)
y_pred = model.predict(X_test_sc)
accuracy_score(y_pred,y_test)


0.8083832335329342

In [151]:
clf_report = classification_report(y_pred, y_test)
print('Classification report')
print("---------------------")
print(clf_report)
print("_____________________")

Classification report
---------------------
              precision    recall  f1-score   support

           0       0.75      0.85      0.80        74
           1       0.87      0.77      0.82        93

    accuracy                           0.81       167
   macro avg       0.81      0.81      0.81       167
weighted avg       0.82      0.81      0.81       167

_____________________


In [152]:
model.score(X_test_sc,y_test)

0.8083832335329342

In [153]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

mse, rmse

(0.19161676646706588, 0.43774052413166625)

In [154]:
confusion_matrix(y_test,y_pred)

array([[63, 21],
       [11, 72]], dtype=int64)

In [22]:

joblib.dump(model,r"Liver_Model.pkl")

['Liver_Model.pkl']

In [133]:
#a=sc.transform([[0.9,0.3,202,14,6.7,3.6,1.1]]) #=0
#a=sc.transform([[2.6,1.2,410,59,5.6,3,0.8]]) #1
a=sc.transform([[0.9,0.2,279,40,7.3,4,1.2]])
y_pre = model.predict(a)
y_pre[0]

1

In [321]:
np.array([y_test])

array([[0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
        0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0,
        1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
        0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0,
        1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
        1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
        1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1]], dtype=int64)