# Applying ML Models

In [20]:
from sklearn import model_selection, metrics, preprocessing
import pandas as pd

df = pd.read_csv('data.csv')

In [15]:
df['sex'] = df['sex'].astype('object')
df['cp'] = df['cp'].astype('object')
df['fbs'] = df['fbs'].astype('object')
df['restecg'] = df['restecg'].astype('object')
df['exang'] = df['exang'].astype('object')
df['slope'] = df['slope'].astype('object')
df['thal'] = df['thal'].astype('object')

In [21]:
def label_encode(dataset, column):
    dataset_copy = dataset.copy()
    
    label_encoder = preprocessing.LabelEncoder()
    dataset_copy[column] = label_encoder.fit_transform(dataset[column])
    
    return dataset_copy

In [22]:
df = label_encode(df, 'sex')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Split Train and Test sets

In [31]:
X_train, X_test, y_train, y_test = \
model_selection.train_test_split( # Test and train data split builtin function
    df.drop('target', 1), # Input Data: dataset without target series(dropped target series from axis 1)
    df['target'], # Output Data: target series
    test_size = .2, # How much to be given to test dataset
    random_state=10 # Seed used to shuffle
)

In [24]:
X_train.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
286,59,1,3,134,204,0,1,162,0,0.8,2,2,2
102,63,0,1,140,195,0,1,179,0,0.0,2,2,2
242,64,1,0,145,212,0,0,132,0,2.0,1,2,1
65,35,0,0,138,183,0,1,182,0,1.4,2,0,2
35,46,0,2,142,177,0,0,160,1,1.4,0,0,2


In [50]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(max_depth=20)

model.fit(X_train, y_train)
predictions = model.predict(X_test)

accuracy = metrics.accuracy_score(y_test, predictions)
print(accuracy)

0.8360655737704918




In [69]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

def test_classifiers(X_train, X_test, y_train, y_test):
    classifiers=[['Logistic Regression',LogisticRegression()],
           ['Decision Tree Classification',DecisionTreeClassifier()],
           ['Gradient Boosting Classification', GradientBoostingClassifier()],
           ['Ada Boosting Classification',AdaBoostClassifier()],
           ['Extra Tree Classification', ExtraTreesClassifier()],
           ['K-Neighbors Classification',KNeighborsClassifier()],
           ['Support Vector Classification',SVC()],
           ['Gaussian Naive Bayes',GaussianNB()]]
    cla_pred = {'method': [], 'accuracy': []}
    for name, model in classifiers:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        
        cla_pred['method'].append(name)
        cla_pred['accuracy'].append(accuracy*100)
        
    return pd.DataFrame(cla_pred)

In [70]:
test_classifiers(X_train, X_test, y_train, y_test)



Unnamed: 0,method,accuracy
0,Logistic Regression,77.04918
1,Decision Tree Classification,77.04918
2,Gradient Boosting Classification,80.327869
3,Ada Boosting Classification,83.606557
4,Extra Tree Classification,80.327869
5,K-Neighbors Classification,59.016393
6,Support Vector Classification,42.622951
7,Gaussian Naive Bayes,78.688525
