<a href="https://colab.research.google.com/github/michellejuliet/Health-Prediictor-/blob/main/LinearRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the dataset from Google Drive
from google.colab import drive
drive.mount('/content/drive')

path = '/content/drive/MyDrive/Colab Notebooks/cardio_train.csv'
cardio_data = pd.read_csv(path, sep=';')

# Data Preprocessing
cardio_data['age'] = cardio_data['age'] // 365
cardio_data.drop('id', axis=1, inplace=True)

# Bar Plot - Age vs. Cardiac Disease
age_cardio = pd.DataFrame({
    'The Cardiac disease by average': cardio_data.groupby('age')['cardio'].mean(numeric_only=True),
    'Age': sorted(cardio_data['age'].unique())
})
px.bar(age_cardio, 'Age', 'The Cardiac disease by average', color='Age',
       title='The risk of Cardiac disease by increasing Age')

# Bar Plot - Cholesterol vs. Cardiac Disease
age_chol = pd.DataFrame({
    'The Cardiac disease by average': cardio_data.groupby('cholesterol')['cardio'].mean(numeric_only=True),
    'The amount of Cholesterol': ['Normal', 'Above Normal', 'Well Above Normal']
})
px.bar(age_chol, 'The amount of Cholesterol', 'The Cardiac disease by average',
       color='The amount of Cholesterol', title='The risk of Cardiac disease by increasing Cholesterol')

# Splitting the data into features (X) and target (y)
X = cardio_data.drop('cardio', axis=1)
y = cardio_data['cardio']

# Feature Scaling
ss = StandardScaler()
X = ss.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Logistic Regression Model
logistic_model = LogisticRegression(max_iter=1000, random_state=0)

# Fit the model to the training data
logistic_model.fit(X_train, y_train)

# Make predictions on the test data
logistic_preds = logistic_model.predict(X_test)

# # Evaluate the logistic regression model
# accuracy = accuracy_score(y_test, logistic_preds)
# precision = precision_score(y_test, logistic_preds)
# recall = recall_score(y_test, logistic_preds)
# f1 = f1_score(y_test, logistic_preds)

# print("Logistic Regression Metrics:")
# print(f"Accuracy: {accuracy:.4f}")
# print(f"Precision: {precision:.4f}")
# print(f"Recall: {recall:.4f}")
# print(f"F1-score: {f1:.4f}")

# # Classification Report
# print(classification_report(y_test, logistic_preds))

# Evaluate the logistic regression model
accuracy = accuracy_score(y_test, logistic_preds)
precision = precision_score(y_test, logistic_preds)
recall = recall_score(y_test, logistic_preds)
f1 = f1_score(y_test, logistic_preds)

# Print the metrics in percentages
print("Logistic Regression Metrics:")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1-score: {f1 * 100:.2f}%")

# Classification Report
report = classification_report(y_test, logistic_preds, output_dict=True)
for key, value in report.items():
    if key.isdigit():
        print(f"Class {key}:")
        print(f"  Precision: {value['precision'] * 100:.2f}%")
        print(f"  Recall: {value['recall'] * 100:.2f}%")
        print(f"  F1-score: {value['f1-score'] * 100:.2f}%")
    # else:
    #     print(f"{key}:")
    #     # print(f"  Precision: {value['precision'] * 100:.2f}%")
    #     print(f"  Recall: {value['recall'] * 100:.2f}%")
    #     print(f"  F1-score: {value['f1-score'] * 100:.2f}%")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Logistic Regression Metrics:
Accuracy: 72.15%
Precision: 74.22%
Recall: 67.18%
F1-score: 70.52%
Class 0:
  Precision: 70.46%
  Recall: 77.04%
  F1-score: 73.60%
Class 1:
  Precision: 74.22%
  Recall: 67.18%
  F1-score: 70.52%
