In [175]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [176]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
data = pd.read_csv("Heart_disease_cleveland.csv")

In [177]:
print(data)

     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0     63    1   0       145   233    1        2      150      0      2.3   
1     67    1   3       160   286    0        2      108      1      1.5   
2     67    1   3       120   229    0        2      129      1      2.6   
3     37    1   2       130   250    0        0      187      0      3.5   
4     41    0   1       130   204    0        2      172      0      1.4   
..   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   
298   45    1   0       110   264    0        0      132      0      1.2   
299   68    1   3       144   193    1        0      141      0      3.4   
300   57    1   3       130   131    0        0      115      1      1.2   
301   57    0   1       130   236    0        2      174      0      0.0   
302   38    1   2       138   175    0        0      173      0      0.0   

     slope  ca  thal  target  
0        2   0     2       0  
1        1   3     1     

In [178]:
data.replace('?', pd.NA, inplace=True)
data = data.apply(pd.to_numeric, errors='coerce')
data = data.fillna(data.mean()) 

In [179]:
print(data.head())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   0       145   233    1        2      150      0      2.3      2   
1   67    1   3       160   286    0        2      108      1      1.5      1   
2   67    1   3       120   229    0        2      129      1      2.6      1   
3   37    1   2       130   250    0        0      187      0      3.5      2   
4   41    0   1       130   204    0        2      172      0      1.4      0   

   ca  thal  target  
0   0     2       0  
1   3     1       1  
2   2     3       1  
3   0     1       0  
4   0     1       0  


In [180]:
data.shape

(303, 14)

In [181]:
X = data.drop(columns = 'target')
Y = data['target']

In [182]:
print(X)

     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0     63    1   0       145   233    1        2      150      0      2.3   
1     67    1   3       160   286    0        2      108      1      1.5   
2     67    1   3       120   229    0        2      129      1      2.6   
3     37    1   2       130   250    0        0      187      0      3.5   
4     41    0   1       130   204    0        2      172      0      1.4   
..   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   
298   45    1   0       110   264    0        0      132      0      1.2   
299   68    1   3       144   193    1        0      141      0      3.4   
300   57    1   3       130   131    0        0      115      1      1.2   
301   57    0   1       130   236    0        2      174      0      0.0   
302   38    1   2       138   175    0        0      173      0      0.0   

     slope  ca  thal  
0        2   0     2  
1        1   3     1  
2        1   2    

In [183]:
print(Y)

0      0
1      1
2      1
3      0
4      0
      ..
298    1
299    1
300    1
301    1
302    0
Name: target, Length: 303, dtype: int64


In [184]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [185]:
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size = 0.2, random_state = 3)

In [186]:
print(X.shape)
print(X_train.shape)
print(X_test.shape)

(303, 13)
(242, 13)
(61, 13)


In [187]:
print(Y.shape)
print(Y_train.shape)
print(Y_test.shape)

(303,)
(242,)
(61,)


In [188]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, Y_train)

In [189]:
prediction_on_training_data = model.predict(X_train)
accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data)

In [190]:
print('Accuracy on training data : ', accuracy_on_training_data)

Accuracy on training data :  0.8471074380165289


In [191]:
prediction_on_test_data = model.predict(X_test)
accuracy_on_test_data = accuracy_score(Y_test, prediction_on_test_data)

In [192]:
print('Accuracy on test data : ', accuracy_on_test_data)

Accuracy on test data :  0.8688524590163934


In [193]:
input1 = [63,1,0,145,233,1,2,150,0,2.3,2,0,2]
input2 = [67,1,3,160,286,0,2,108,1,1.5,1,3,1]
input_df = pd.DataFrame([input2], columns=X.columns)
input_scaled = scaler.transform(input_df)
prediction = model.predict(input_scaled)
print(prediction)
if(prediction[0] == 1):
    print("Most Likely")
else:
    print("Least Likely")

[1]
Most Likely
