## import libaries

In [1]:
import numpy as np
import pandas as pd
import pylab as pl
import scipy.optimize as opt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

## Loading data from csv file

In [2]:
dataframe = pd.read_csv("heart.csv")
dataframe.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


### Data pre-processing and selection

In [3]:
dataframe.dtypes

age           int64
sex           int64
cp            int64
trtbps        int64
chol          int64
fbs           int64
restecg       int64
thalachh      int64
exng          int64
oldpeak     float64
slp           int64
caa           int64
thall         int64
output        int64
dtype: object

In [4]:
x = np.asarray(dataframe[['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh', 'exng', 'oldpeak', 'slp', 'caa', 'thall']])
x[0:5]

array([[ 63. ,   1. ,   3. , 145. , 233. ,   1. ,   0. , 150. ,   0. ,
          2.3,   0. ,   0. ,   1. ],
       [ 37. ,   1. ,   2. , 130. , 250. ,   0. ,   1. , 187. ,   0. ,
          3.5,   0. ,   0. ,   2. ],
       [ 41. ,   0. ,   1. , 130. , 204. ,   0. ,   0. , 172. ,   0. ,
          1.4,   2. ,   0. ,   2. ],
       [ 56. ,   1. ,   1. , 120. , 236. ,   0. ,   1. , 178. ,   0. ,
          0.8,   2. ,   0. ,   2. ],
       [ 57. ,   0. ,   0. , 120. , 354. ,   0. ,   1. , 163. ,   1. ,
          0.6,   2. ,   0. ,   2. ]])

In [5]:
dataframe['output'] = dataframe['output'].astype('int')
y = np.asarray(dataframe['output'])
y [0:5]

array([1, 1, 1, 1, 1])

### Train and Test data

In [6]:
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print ('Train set:', x_train.shape,  y_train.shape)
print ('Test set:', x_test.shape,  y_test.shape)

Train set: (242, 13) (242,)
Test set: (61, 13) (61,)


In [7]:
np.unique(y_test)

array([0, 1])

## Modeling: SVM

In [8]:
from sklearn import svm
clf = svm.SVC(kernel='linear', probability = True, C = 1, gamma = 'scale')
clf.fit(x_train, y_train) 

In [9]:
yhat = clf.predict(x_test)
yhat [0:5]

array([1, 0, 1, 1, 1])

### Evaluation

In [10]:
from sklearn.metrics import classification_report, confusion_matrix
import itertools

In [11]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Your code to generate y_test and yhat

# Get the unique labels present in y_test
unique_labels = np.unique(y_test)

# Check if there are at least two unique labels
if len(unique_labels) >= 2:
    cnf_matrix = confusion_matrix(y_test, yhat, labels=unique_labels)
    np.set_printoptions(precision=2)
    print(classification_report(y_test, yhat))
else:
    print("There are not enough unique labels in y_test to compute a confusion matrix.")


              precision    recall  f1-score   support

           0       0.92      0.88      0.90        25
           1       0.92      0.94      0.93        36

    accuracy                           0.92        61
   macro avg       0.92      0.91      0.91        61
weighted avg       0.92      0.92      0.92        61



## Modeling: Logistic Regression

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
LR = LogisticRegression(C = 0.01, solver = 'lbfgs').fit(x_train, y_train)
LR

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [13]:
yhat = LR.predict(x_test)
print(yhat)

yhat_prob = LR.predict_proba(x_test)

[1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 1 1 1
 1 1 0 0 0 0 1 1 0 0 1 0 1 0 1 0 1 1 1 1 1 0 0 1]


### Evaluation

In [14]:
from sklearn.metrics import jaccard_score
jaccard_score(y_test, yhat,pos_label=0)

0.6296296296296297

## Modeling: Decision Tress

In [15]:
from sklearn.tree import DecisionTreeClassifier
drugTree = DecisionTreeClassifier(criterion="entropy", max_depth = 4)
drugTree

In [16]:
drugTree.fit(x_train, y_train)

### Prediction

In [17]:
predTree = drugTree.predict(x_test)

In [18]:
print (predTree [0:5])
print (y_test [0:5])

[0 0 1 1 1]
[1 0 1 1 0]


### Evaluation

In [19]:
from sklearn import metrics
print (print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_test, predTree)))

DecisionTrees's Accuracy:  0.8524590163934426
None
