## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing dataset

In [2]:
heart_disease = pd.read_csv("heart-disease.csv")
heart_disease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
len(heart_disease)

303

## Splitting the data

In [4]:
#Setting up random seed
np.random.seed(42)

#Make data
X = heart_disease.drop("target", axis=1)
y = heart_disease["target"]

#Splitting the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Trying out various Models

### `Linear SVC`

In [5]:
from sklearn.svm import LinearSVC

clf = LinearSVC(max_iter = 10000)
clf.fit(X_train, y_train)

#Getting model score
clf.score(X_test, y_test)



0.8688524590163934

### `Random Forest Classifier`

In [6]:
from sklearn.ensemble import RandomForestClassifier

#Instantiating the model
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)

#Getting model score
clf.score(X_test, y_test)

0.8524590163934426

## Making predictions

In [7]:
clf.predict(X_test)

array([0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0], dtype=int64)

In [8]:
np.array([y_test])

array([[0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0,
        0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0]], dtype=int64)

## Comparing predictions to the truth values to evaluate the model

In [9]:
y_preds = clf.predict(X_test)
np.mean(y_preds == y_test)

0.8524590163934426

## Making predictions using `predict_proba()`

#### `predict_proba()` returns probabilities of a classification label

In [10]:
clf.predict_proba(X_test[:5]) #Returning just the first five

array([[0.89, 0.11],
       [0.49, 0.51],
       [0.44, 0.56],
       [0.84, 0.16],
       [0.18, 0.82]])

#### Here, we're getting labels as either `0` or `1`

In [11]:
#Predicting on the same data for the first five
clf.predict(X_test[:5])

array([0, 1, 1, 0, 1], dtype=int64)