In [1]:
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    confusion_matrix,
    classification_report
)
import joblib

In [5]:
DATA_PATH = "/content/heart.csv"

In [6]:
df = pd.read_csv(DATA_PATH)


In [7]:
print("‚úÖ Dataset loaded successfully!")
print("Shape:", df.shape)
print("\nFirst 5 rows:\n", df.head())

‚úÖ Dataset loaded successfully!
Shape: (1025, 14)

First 5 rows:
    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  


In [8]:
print("\n‚úÖ Column names:\n", df.columns.tolist())
print("\n‚úÖ Missing values per column:\n", df.isnull().sum())


‚úÖ Column names:
 ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']

‚úÖ Missing values per column:
 age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64


In [9]:
if "target" not in df.columns:
    raise ValueError("‚ùå 'target' column not found. Please check dataset column names.")

X = df.drop("target", axis=1)
y = df["target"]

print("\n‚úÖ Features shape:", X.shape)
print("‚úÖ Target shape:", y.shape)
print("\nTarget distribution:\n", y.value_counts())


‚úÖ Features shape: (1025, 13)
‚úÖ Target shape: (1025,)

Target distribution:
 target
1    526
0    499
Name: count, dtype: int64


In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,       # 20% test data
    random_state=42,     # reproducibility
    stratify=y           # keeps class balance similar in train/test
)

In [11]:
print("\n‚úÖ Data split done!")
print("Train size:", X_train.shape[0])
print("Test size :", X_test.shape[0])


‚úÖ Data split done!
Train size: 820
Test size : 205


In [12]:
model = LogisticRegression(max_iter=1000, solver="liblinear")
model.fit(X_train, y_train)

print("\n‚úÖ Model training completed!")


‚úÖ Model training completed!


In [13]:
y_pred = model.predict(X_test)

print("\n‚úÖ Predictions generated!")
print("First 10 predictions:", y_pred[:10])
print("First 10 actual     :", y_test.iloc[:10].values)



‚úÖ Predictions generated!
First 10 predictions: [0 0 0 1 0 0 1 1 1 0]
First 10 actual     : [0 1 0 1 0 0 1 0 1 1]


In [14]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)   # assumes class '1' is positive
rec = recall_score(y_test, y_pred)

In [15]:

print("\n================= EVALUATION =================")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")


Accuracy : 0.8341
Precision: 0.7886
Recall   : 0.9238


In [16]:
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

print("\nConfusion Matrix:")
print(cm)


Confusion Matrix:
[[74 26]
 [ 8 97]]


In [17]:
print("\nConfusion Matrix Breakdown:")
print(f"TN (True Negatives) : {tn}")
print(f"FP (False Positives): {fp}")
print(f"FN (False Negatives): {fn}")
print(f"TP (True Positives) : {tp}")


Confusion Matrix Breakdown:
TN (True Negatives) : 74
FP (False Positives): 26
FN (False Negatives): 8
TP (True Positives) : 97


In [18]:
report = classification_report(y_test, y_pred)
print("\nClassification Report:\n", report)


# 11) SIMPLE INTERPRETATION (AUTOMATED TEXT)
print("\n================= INTERPRETATION =================")
print("Interpretation:")
print(f"- The model accuracy is {acc:.2%}, meaning overall {acc:.2%} predictions are correct.")
print(f"- Precision is {prec:.2%}, meaning when the model predicts disease, it is correct {prec:.2%} of the time.")
print(f"- Recall is {rec:.2%}, meaning it detects {rec:.2%} of actual disease cases.")
print(f"- False Negatives (FN) = {fn}, which are missed disease cases (important to minimize).")
print(f"- False Positives (FP) = {fp}, which are healthy predicted as disease (causes extra tests).")


Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.74      0.81       100
           1       0.79      0.92      0.85       105

    accuracy                           0.83       205
   macro avg       0.85      0.83      0.83       205
weighted avg       0.84      0.83      0.83       205


Interpretation:
- The model accuracy is 83.41%, meaning overall 83.41% predictions are correct.
- Precision is 78.86%, meaning when the model predicts disease, it is correct 78.86% of the time.
- Recall is 92.38%, meaning it detects 92.38% of actual disease cases.
- False Negatives (FN) = 8, which are missed disease cases (important to minimize).
- False Positives (FP) = 26, which are healthy predicted as disease (causes extra tests).


In [19]:
MODEL_PATH = "logreg_heart_model.pkl"
joblib.dump(model, MODEL_PATH)

print(f"\n‚úÖ Trained model saved as: {MODEL_PATH}")


‚úÖ Trained model saved as: logreg_heart_model.pkl


In [20]:
REPORT_PATH = "evaluation_report.txt"
with open(REPORT_PATH, "w") as f:
    f.write("Heart Disease Logistic Regression Evaluation Report\n")
    f.write("=================================================\n\n")
    f.write(f"Accuracy : {acc:.4f}\n")
    f.write(f"Precision: {prec:.4f}\n")
    f.write(f"Recall   : {rec:.4f}\n\n")
    f.write("Confusion Matrix:\n")
    f.write(str(cm))
    f.write("\n\nClassification Report:\n")
    f.write(report)

print(f"‚úÖ Evaluation report saved as: {REPORT_PATH}")

print("\nüéâ TASK COMPLETED SUCCESSFULLY!")

‚úÖ Evaluation report saved as: evaluation_report.txt

üéâ TASK COMPLETED SUCCESSFULLY!


In [21]:
from google.colab import files
files.download('evaluation_report.txt')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>