# Predicting medical appointments using Python

In this notebook, I'll use Python and its libraries to predict whether someone would show up for a medical appointment or not.

In [3]:
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

## Import dataset

In [38]:
dataset = pd.read_csv('data/dataset_modified.csv')
dataset.head(5)

Unnamed: 0,PatientId,AppointmentID,Gender,ScheduledDay,AppointmentDay,Age,Neighbourhood,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,Showed_up
0,29872500000000.0,5642903,F,2016-04-29,2016-04-29,62,JARDIM DA PENHA,False,True,False,False,False,False,True
1,558997800000000.0,5642503,M,2016-04-29,2016-04-29,56,JARDIM DA PENHA,False,False,False,False,False,False,True
2,4262962000000.0,5642549,F,2016-04-29,2016-04-29,62,MATA DA PRAIA,False,False,False,False,False,False,True
3,867951200000.0,5642828,F,2016-04-29,2016-04-29,8,PONTAL DE CAMBURI,False,False,False,False,False,False,True
4,8841186000000.0,5642494,F,2016-04-29,2016-04-29,56,JARDIM DA PENHA,False,True,True,False,False,False,True


As we don't need all the columns, I'll start ommiting them.

In [39]:
dataset = dataset.drop(['PatientId', 'AppointmentID', 'ScheduledDay', 'AppointmentDay'], axis = 1)
dataset.columns

Index(['Gender', 'Age', 'Neighbourhood', 'Scholarship', 'Hipertension',
       'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received', 'Showed_up'],
      dtype='object')

In [40]:
dataset = pd.concat([dataset.drop('Neighbourhood', axis = 1), 
           pd.get_dummies(dataset['Neighbourhood'])], axis=1)

In [41]:
gender_map = {'M': 0, 'F': 1}
dataset['Gender'] = dataset['Gender'].map(gender_map)

In [42]:
y = dataset.loc[:, 'Showed_up']
X = dataset.drop(['Showed_up'], axis = 1)

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)

In [45]:
standardScaler = StandardScaler()
X_train = standardScaler.fit_transform(X_train)
X_test = standardScaler.transform(X_test)

In [None]:
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train)

prediction = knn.predict(X_test)

accuracy_score((y_test, prediciton))*100