In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
import warnings
warnings.simplefilter("ignore")
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
import plotly.express  as px
import plotly.graph_objs as go
from plotly.offline import iplot

In [None]:
data=pd.read_csv("/kaggle/input/indian-liver-patient-records/indian_liver_patient.csv")

In [None]:
data.head()

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
label=data["Dataset"].value_counts().index
value=data["Dataset"].value_counts().values
color=data["Dataset"]
fig=go.Figure(go.Pie(labels=label,values=value,textinfo="label+percent",marker=dict(colors=color)))
fig.show()

In [None]:
label=data["Gender"].value_counts().index
value=data["Gender"].value_counts().values
color=data["Gender"]
fig=go.Figure(go.Pie(labels=label,values=value,textinfo="label+percent",marker=dict(colors=color)))
fig.show()

In [None]:
sns.catplot(x="Age",y="Gender",hue="Dataset",data=data)

In [None]:
data[["Gender","Dataset","Age"]].groupby(["Dataset","Gender"],as_index=False).mean().sort_values(by="Dataset",ascending=False)

In [None]:
h=sns.FacetGrid(data,col="Dataset",row="Gender",margin_titles=True)
h.map(plt.hist,"Age",color="teal")
plt.subplots_adjust(top=0.9)
h.fig.suptitle("Disease by Gender and Age")

In [None]:
h=sns.FacetGrid(data,col="Gender",row="Dataset",margin_titles=True)
h.map(plt.scatter,"Direct_Bilirubin","Total_Bilirubin",edgecolor="w")
plt.subplots_adjust(top=0.9)

In [None]:
sns.jointplot("Total_Bilirubin","Direct_Bilirubin",data=data,kind="reg")

In [None]:
h=sns.FacetGrid(data,col="Gender",row="Dataset",margin_titles=True)
h.map(plt.scatter,"Aspartate_Aminotransferase","Alamine_Aminotransferase",edgecolor="w")
plt.subplots_adjust(top=0.9)

In [None]:
sns.jointplot("Aspartate_Aminotransferase","Alamine_Aminotransferase",data=data,kind="reg")

In [None]:
h=sns.FacetGrid(data,col="Gender",row="Dataset",margin_titles=True)
h.map(plt.scatter,"Alkaline_Phosphotase","Alamine_Aminotransferase",edgecolor="w")
plt.subplots_adjust(top=0.9)

In [None]:
sns.jointplot("Alkaline_Phosphotase","Alamine_Aminotransferase",data=data,kind="reg")

In [None]:
h=sns.FacetGrid(data,col="Gender",row="Dataset",margin_titles=True)
h.map(plt.scatter,"Total_Protiens","Albumin",edgecolor="w")
plt.subplots_adjust(top=0.9)

In [None]:
sns.jointplot("Total_Protiens","Albumin",data=data,kind="reg")

In [None]:
h=sns.FacetGrid(data,col="Gender",row="Dataset",margin_titles=True)
h.map(plt.scatter,"Albumin","Albumin_and_Globulin_Ratio",edgecolor="w")
plt.subplots_adjust(top=0.9)

In [None]:
sns.jointplot("Albumin","Albumin_and_Globulin_Ratio",data=data,kind="reg")

In [None]:
h=sns.FacetGrid(data,col="Gender",row="Dataset",margin_titles=True)
h.map(plt.scatter,"Albumin_and_Globulin_Ratio","Total_Protiens",edgecolor="w")
plt.subplots_adjust(top=0.9)

In [None]:
sns.jointplot("Albumin_and_Globulin_Ratio","Total_Protiens",data=data,kind="reg")

In [None]:
data.head()

In [None]:
pd.get_dummies(data["Gender"],prefix="Gender").head()

In [None]:
final_data=pd.concat([data,pd.get_dummies(data["Gender"],prefix="Gender")],axis=1)

In [None]:
final_data.head()

In [None]:
final_data.shape

In [None]:
final_data.describe()

In [None]:
final_data[final_data["Albumin_and_Globulin_Ratio"].isnull()]

In [None]:
final_data["Albumin_and_Globulin_Ratio"]=final_data["Albumin_and_Globulin_Ratio"].fillna(final_data["Albumin_and_Globulin_Ratio"].mean())

In [None]:
final_data.isnull().sum()

In [None]:
x=final_data.drop(["Gender","Dataset"],axis=1)
y=final_data.Dataset

In [None]:
x.head()

In [None]:
y.head()

In [None]:
plt.figure(figsize=(12,12))
sns.heatmap(x.corr(),cbar=True,square=True,annot=True,fmt=".2f",annot_kws={'size':12},cmap="rainbow")
plt.title("correlation between features")

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
x_train.shape,y_train.shape,x_test.shape,y_test.shape

In [None]:
clf_ga=GaussianNB().fit(x_train,y_train)
pred_ga=clf_ga.predict(x_test)
ga_score=round(clf_ga.score(x_train,y_train)*100,2)
ga_test_score=round(clf_ga.score(x_test,y_test)*100,2)
print("Gaussian NB training score : " ,ga_score)
print("Gaussian NB testing score : " ,ga_test_score)

print(accuracy_score(y_test,pred_ga))
print(confusion_matrix(y_test,pred_ga))
print(classification_report(y_test,pred_ga))
sns.heatmap(confusion_matrix(y_test,pred_ga),annot=True,fmt="d")

In [None]:
clf_rf=RandomForestClassifier().fit(x_train,y_train)
pred_rf=clf_rf.predict(x_test)
rf_score=round(clf_rf.score(x_train,y_train)*100,2)
rf_test_score=round(clf_rf.score(x_test,y_test)*100,2)
print("Random Forest Classifier training score : " ,rf_score)
print("Random Forest Classifier testing score : " ,rf_test_score)

print(accuracy_score(y_test,pred_rf))
print(confusion_matrix(y_test,pred_rf))
print(classification_report(y_test,pred_rf))
sns.heatmap(confusion_matrix(y_test,pred_rf),annot=True,fmt="d")

In [None]:
clf_svm=LinearSVC().fit(x_train,y_train)
pred_svm=clf_svm.predict(x_test)
svm_score=round(clf_svm.score(x_train,y_train)*100,2)
svm_test_score=round(clf_svm.score(x_test,y_test)*100,2)
print("Support Vector Classifier training score : " ,svm_score)
print("Support Vector Classifier testing score : " ,svm_test_score)

print(accuracy_score(y_test,pred_svm))
print(confusion_matrix(y_test,pred_svm))
print(classification_report(y_test,pred_svm))
sns.heatmap(confusion_matrix(y_test,pred_svm),annot=True,fmt="d")

In [None]:
models=pd.DataFrame({
    "Model":["Support vector machines ","Gaussian Naive Bayes","Random Forest"],
    "Score":[svm_score,ga_score,rf_score],
    "Test_Score":[svm_test_score,ga_test_score,rf_test_score]
})
models.sort_values(by="Test_Score",ascending=False)