In [187]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [188]:
#Load the Data
data = pd.read_csv("../input/health/health_measures.csv")


#Pre-Process the data
new_data = data.drop(columns = ['age','sex','exang','oldpeak','slope','ca','thal'])

df = new_data.rename(columns = {'cp':'Chest-Pain','trestbps':'Blood-Pressure','chol':'Cholestrol',
                                'fbs':'Blood-Sugar','restecg':'Electrocardiogram','thalach':'Heart-Rate',
                               })

In [189]:
#Data Cleaning - Change the data the way you want the model to be trained
for i in range(len(df['Heart-Rate'])):
    if data['age'][i] + df['Heart-Rate'][i] > 240 or data['age'][i] + df['Heart-Rate'][i] < 200:
        df['Heart-Rate'][i] = 1
    else:
        df['Heart-Rate'][i] = 0

In [190]:
#Decide the target value based on the above mentioned conditions
for i in range(len(df['target'])):
    if (df['Chest-Pain'][i] == 2 or df['Chest-Pain'][i] == 3) and (90 < df['Blood-Pressure'][i] < 141) and (df['Cholestrol'][i] < 240) and (df['Blood-Sugar'][i] == 0) and (df['Electrocardiogram'][i] == 0) and (df['Heart-Rate'][i] == 0):
        df['target'][i] = 1
    else:
        df['target'][i] = 0

In [191]:
#Pre-Processed Data
df

Unnamed: 0,Chest-Pain,Blood-Pressure,Cholestrol,Blood-Sugar,Electrocardiogram,Heart-Rate,target
0,0,145,233,1,2,0,0
1,3,160,286,0,2,1,0
2,3,120,229,0,2,1,0
3,2,130,250,0,0,0,0
4,1,130,204,0,2,0,0
...,...,...,...,...,...,...,...
298,0,110,264,0,0,1,0
299,3,144,193,1,0,0,0
300,3,130,131,0,0,1,0
301,1,130,236,0,2,0,0


1. **Chest-pain** : 0 typical angina, 1 atypical angina, 2 non- anginal pain, 3 Normal
2. **Blood-Pressure** : 91-139 is normal, 90> and 140< is unsafe
3. **Cholestrol** : <240 is normal, else it's unsafe
4. **Blood-Sugar** : 0 is normal, 1 is not
5. **Electrocardiogram** : 0 is Normal, 1: having ST-T wave abnormality, 2: showing probable or definite left ventricular hypertrophyby Estes' criteria
6. **Heart-Rate** : 0 is normal, 1 is not
7. **Target** : 1 is ready to undergo surgery, 0 is not


In [192]:
#Load the data, 'X' being the features and 'y' being the target
X = data.drop('target', axis=1)
y = data['target']

In [193]:
#Split the data into 80per for training and 20per for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [194]:
# Create the Logistic Regression model
model = LogisticRegression(solver='liblinear', C = 10, random_state = 0)

# Fit the model to the training data
model.fit(X_train, y_train)

In [195]:
#Test the model by making it predict the X-test data
y_pred = model.predict(X_test)

In [196]:
#Check the performance of the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)
print("Confusion Matrix:\n", cm)

Accuracy: 0.9016393442622951
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.90      0.90        29
           1       0.91      0.91      0.91        32

    accuracy                           0.90        61
   macro avg       0.90      0.90      0.90        61
weighted avg       0.90      0.90      0.90        61

Confusion Matrix:
 [[26  3]
 [ 3 29]]
