### Importing Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from pyod.models.hbos import HBOS
from sklearn import cluster
import sqlite3

### Reading Data from Database

In [2]:
con = sqlite3.connect('healthcare.db')

In [3]:
df = pd.read_sql_query("Select * from healthcare", con)

In [4]:
con.close()

In [5]:
df.head()

Unnamed: 0,Patient_Nbr,Age,Gender,Time_In_Hospital,Num_Medications,Num_Lab_Procedures,State,Hospital_Id,Disease,Smoker,...,Cholestrol,SysBP,DiaBP,BMI,Heart_Rate,Glucose,Diet_Followed,Med_Followed,Steps,Calories_Burnt
0,8222157,1,1.0,1,1,41,California,Ascension,Urinary Tract Infection,0,...,207.97998,132.106646,80.790918,55.433178,94,77.330587,0,0,263,6090
1,55629189,11,1.0,3,18,59,Texas,HCA Healthcare,Knee Arthroplasty,0,...,220.764562,134.827888,70.50233,39.361805,90,119.277996,0,0,443,8340
2,86047875,25,1.0,2,13,11,California,Common Spirit Health,COPD (Chronic Obstructive Pulmonary Disease),0,...,231.726082,121.056315,74.698965,29.317731,51,135.453871,0,0,533,13885
3,82442376,34,0.0,2,16,44,California,Ascension,Kidney Stones,0,...,249.923551,115.229244,108.61389,43.007455,64,155.511067,0,0,495,7901
4,42519267,42,0.0,1,8,51,California,Veteran's Health Administration,Coronary Artery Bypass Grafting (CABG),0,...,193.928251,157.714229,90.923279,53.554976,51,163.077023,1,1,315,7847


### Model Building

In [10]:
class Model:

    # KMeans Clustering
    def kmeans(data = df):
        kmeans = KMeans(n_clusters=2)

        df['Readmission'] = kmeans.fit_predict(df[['Age', 'Gender', 'Smoker', 'Prevalent_Stroke', 'Prevalent_Hyp', 'Diabetes',
                                           'Cholestrol', 'SysBP', 'DiaBP', 'BMI', 'Heart_Rate', 'Glucose', 'Diet_Followed',
                                           'Med_Followed', 'Calories_Burnt', 'Steps']])

        kmeans.cluster_centers_

        # Patient requiring readmission
        return ('No. of patients to be readmitted using KMeans:', df[df['Readmission'] == 1].shape[0])
    
    # Isolation Forest
    def isolation(data = df):   
        clf = IsolationForest(random_state=0)

        df['Readmission'] = clf.fit_predict(df[['Age', 'Gender', 'Smoker', 'Prevalent_Stroke', 'Prevalent_Hyp', 'Diabetes',
                                           'Cholestrol', 'SysBP', 'DiaBP', 'BMI', 'Heart_Rate', 'Glucose', 'Diet_Followed',
                                           'Med_Followed', 'Calories_Burnt', 'Steps']])

        # overall_dataset[(df['Diet_Followed'] == 0) & (df['Readmission'] == 1)]
        return ('No. of patients to be readmitted using Isolation Forest:', df[df['Readmission'] == 1].shape[0])

    # HBOS
    def hbos(data = df):
        clf = HBOS()

        df['Readmission'] = clf.fit_predict(df[['Age', 'Gender', 'Smoker', 'Prevalent_Stroke', 'Prevalent_Hyp', 'Diabetes',
                                           'Cholestrol', 'SysBP', 'DiaBP', 'BMI', 'Heart_Rate', 'Glucose', 'Diet_Followed',
                                           'Med_Followed', 'Calories_Burnt', 'Steps']])

        return ('No. of patients to be readmitted using HBOS:', df[df['Readmission'] == 1].shape[0])


In [11]:
model = Model()

In [14]:
model.hbos()



('No. of patients to be readmitted using HBOS:', 10177)