In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

In [2]:
heart_disease = pd.read_csv('heart-disease.csv')
heart_disease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
from sklearn.datasets import load_boston
boston = load_boston()

boston_df = pd.DataFrame(boston['data'], columns = boston['feature_names'])
boston_df['target'] = pd.Series(boston['target'])

boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


## Classification Model

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

np.random.seed(9)

X = heart_disease.drop('target',axis = 1)
y = heart_disease['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)

clf.score(X_test, y_test)

0.819672131147541

In [5]:
from sklearn.model_selection import cross_val_score

In [6]:
np.random.seed(9)
cv_acc = cross_val_score(clf, X, y)  #scoring parameter is None
cv_acc

array([0.81967213, 0.8852459 , 0.81967213, 0.81666667, 0.76666667])

In [8]:
print(f'The Cross Validated accuracy is : {np.mean(cv_acc)*100:.2f}%')

The Cross Validated accuracy is : 82.16%


In [9]:
#Accuracy
cv_acc = cross_val_score(clf, X, y, scoring = 'accuracy')
print(f'The Cross Validated accuracy is : {np.mean(cv_acc)*100:.2f}%')

The Cross Validated accuracy is : 81.50%


In [11]:
#Precision
cv_precision = cross_val_score(clf, X, y, scoring = 'precision')
cv_precision

array([0.82857143, 0.90625   , 0.84375   , 0.79411765, 0.74358974])

In [12]:
np.mean(cv_precision)

0.8232557638439992

In [13]:
#Recall
cv_recall = cross_val_score(clf, X, y, scoring = 'recall')
np.mean(cv_recall)

0.8666666666666666

In [14]:
#F1-score
cv_f1 = cross_val_score(clf, X, y, scoring = 'f1')
np.mean(cv_f1)

0.8349662785866163

## Regression Model

In [16]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

np.random.seed(22)

X = boston_df.drop('target',axis = 1)
y = boston_df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

clf = RandomForestRegressor()
clf.fit(X_train, y_train)

clf.score(X_test, y_test)

0.8192625713822996

In [19]:
np.random.seed(22)
cv_r2 = cross_val_score(clf, X, y, scoring = None)
np.mean(cv_r2)

0.618610063020285

In [20]:
np.random.seed(22)
#r2_score
cv_r2 = cross_val_score(clf, X, y, scoring = 'r2')
np.mean(cv_r2)

0.618610063020285

In [21]:
#mae
cv_mae = cross_val_score(clf, X, y, scoring = 'neg_mean_absolute_error')
np.mean(cv_mae)

-3.0188510192195683

In [22]:
#mse
cv_mse = cross_val_score(clf, X, y, scoring = 'neg_mean_squared_error')
np.mean(cv_mse)

-21.028419198291584