# Random Forest Classifier

Thanks for the Dataset: [Dataset](https://github.com/siddiquiamir/Data/blob/master/heart.csv#L1)

## Import Necessary Libraries

In [36]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

## Data Preparation

In [4]:
dataframe = pd.read_csv('../data/heart.csv')
dataframe.head()

Unnamed: 0,Age,Gender,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [5]:
dataframe.isna().sum()

Age               0
Gender            0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [8]:
dataframe.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


In [17]:
dataframe.loc[dataframe['RestingBP'] == 0, 'RestingBP'] = dataframe['RestingBP'].mean()

In [23]:
dataframe.loc[dataframe['Cholesterol'] == 0, 'Cholesterol'] = dataframe['Cholesterol'].mean()

In [24]:
dataframe.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.540737,236.047413,0.233115,136.809368,0.887364,0.553377
std,9.432617,17.989932,56.240952,0.423046,25.460334,1.06657,0.497414
min,28.0,80.0,85.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,198.799564,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


## Turn Non-numeric Values into Numeric ones

- Gender
- Chestpain
- RestingECG
- ExerciseAngina
- ST_Slope

In [46]:
gender_encoder = LabelEncoder()
chest_pain_encoder = LabelEncoder()
resting_ECG_encoder = LabelEncoder()
exercise_encoder = LabelEncoder()
st_slope_encoder = LabelEncoder()

In [48]:
dataframe['Gender'] = gender_encoder.fit_transform(dataframe['Gender'])
dataframe['ChestPainType'] = chest_pain_encoder.fit_transform(dataframe['ChestPainType'])
dataframe['RestingECG'] = resting_ECG_encoder.fit_transform(dataframe['RestingECG'])
dataframe['ExerciseAngina'] = exercise_encoder.fit_transform(dataframe['ExerciseAngina'])
dataframe['ST_Slope'] = st_slope_encoder.fit_transform(dataframe['ST_Slope'])

In [49]:
dataframe.head()

Unnamed: 0,Age,Gender,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,1,1,140.0,289.0,0,1,172,0,0.0,2,0
1,49,0,2,160.0,180.0,0,1,156,0,1.0,1,1
2,37,1,1,130.0,283.0,0,2,98,0,0.0,2,0
3,48,0,0,138.0,214.0,0,1,108,1,1.5,1,1
4,54,1,2,150.0,195.0,0,1,122,0,0.0,2,0


## Assigning the value of X and y

In [50]:
X = dataframe.drop(columns=['HeartDisease'])
X.head()

Unnamed: 0,Age,Gender,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,40,1,1,140.0,289.0,0,1,172,0,0.0,2
1,49,0,2,160.0,180.0,0,1,156,0,1.0,1
2,37,1,1,130.0,283.0,0,2,98,0,0.0,2
3,48,0,0,138.0,214.0,0,1,108,1,1.5,1
4,54,1,2,150.0,195.0,0,1,122,0,0.0,2


In [51]:
y = dataframe['HeartDisease']
y[:5]

0    0
1    1
2    0
3    1
4    0
Name: HeartDisease, dtype: int64

## Splitting the Data into Training and Test Data

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Data Scaling