In [13]:
# AI & ML INTERNSHIP â€“ TASK 5
# Train-Test Split & Evaluation Metrics
# Dataset: Heart Disease Dataset

# -------------------------------
# 1. Import Required Libraries
# -------------------------------
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# -------------------------------
# 2. Load Dataset
# -------------------------------
# Make sure heart.csv is in the same folder
df = pd.read_csv("heart.csv")

# Display first 5 rows
print("Dataset Preview:")
print(df.head())

# -------------------------------
# 3. Separate Features and Target
# -------------------------------
X = df.drop("target", axis=1)   # Independent features
y = df["target"]                # Target variable

# -------------------------------
# 4. Train-Test Split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

print("\nTraining set size:", X_train.shape)
print("Testing set size :", X_test.shape)

# -------------------------------
# 5. Train Logistic Regression Model
# -------------------------------
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

print("\nModel training completed.")

# -------------------------------
# 6. Make Predictions
# -------------------------------
y_pred = model.predict(X_test)

# -------------------------------
# 7. Evaluation Metrics
# -------------------------------
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

# -------------------------------
# 8. Display Results
# -------------------------------
print("\n--- Model Evaluation ---")
print("Accuracy :", accuracy)
print("Precision:", precision)
print("Recall   :", recall)

print("\nConfusion Matrix:")
print(cm)

# -------------------------------
# 9. Interpretation
# -------------------------------
print("\n--- Interpretation ---")
print("Accuracy shows overall correctness of the model.")
print("Precision shows how many predicted positive cases were correct.")
print("Recall shows how many actual positive cases were correctly identified.")
print("Confusion matrix shows TP, TN, FP, FN values.")



Dataset Preview:
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  

Training set size: (820, 13)
Testing set size : (205, 13)

Model training completed.

--- Model Evaluation ---
Accuracy : 0.7951219512195122
Precision: 0.7563025210084033
Recall   : 0.8737864077669902

Confusion Matrix:
[[73 29]
 [13 90]]

--- Interpretation ---
Accuracy shows overall correctness of the model.
Precision shows how many predicted positive cases w