In [1]:
# Install required libraries silently
!pip install scikit-learn pandas numpy joblib streamlit pyngrok --quiet


[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.1/9.1 MB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Import the 'csv' module for working with CSV (Comma Separated Values) files.
import csv

# Import the 'files' module from google.colab to allow file uploads.
from google.colab import files

# Import pandas library for data manipulation and analysis.
import pandas as pd

# Prompt the user to upload their heart disease dataset file (like heart.csv).
uploaded = files.upload()

# Extract the first uploaded filename (from the dictionary of uploaded files).
filename = list(uploaded.keys())[0]

# Read the uploaded CSV file into a pandas DataFrame named 'df'.
df = pd.read_csv(filename)

# Print confirmation that the dataset was loaded successfully.
print("Dataset loaded successfully!")

# Print the shape of the dataset to show number of rows and columns.
print("Shape:", df.shape)

# Print the column names of the dataset to identify available features.
print("\nColumns:", df.columns.tolist())

# Display the first few rows of the dataset for quick preview.
df.head()


Saving heart.csv to heart.csv
Dataset loaded successfully!
Shape: (303, 14)

Columns: ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:

# HEART DISEASE MODEL TRAINING

# Import required ML libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import accuracy_score, classification_report
import joblib


# 1 Define target column and separate features/labels
target = 'target'                     # the label we want to predict
X = df.drop(columns=[target])         # all other columns = input features
y = df[target]                        # target = heart disease (0 or 1)

# 2 Split into train/test sets (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)


# 3 Scale (normalize) the features

scaler = StandardScaler()                 # initialize the scaler
X_train_scaled = scaler.fit_transform(X_train)   # fit + transform train data
X_test_scaled = scaler.transform(X_test)         # transform test data (no fit again)


# 4 Initialize a balanced Logistic Regression model

# 'class_weight=balanced' helps when dataset has more heart disease cases than healthy ones
model_base = LogisticRegression(
    max_iter=1000,              # ensure convergence
    class_weight='balanced',    # handle imbalance
    solver='lbfgs')              # robust optimizer


# 5 Wrap the model with a calibration layer for realistic probabilities

# sklearn >=1.6 renamed argument to 'estimator'; we handle both for compatibility
try:
    calibrator = CalibratedClassifierCV(estimator=model_base, method='sigmoid', cv=5)
except TypeError:
    calibrator = CalibratedClassifierCV(base_estimator=model_base, method='sigmoid', cv=5)


# 6 Train (fit) the calibrated model on scaled training data

calibrator.fit(X_train_scaled, y_train)


# 7 Make predictions on the test set

y_pred = calibrator.predict(X_test_scaled)


# 8 Evaluate the model

print("Model trained and calibrated successfully!\n")
print("Accuracy on Test Data:", round(accuracy_score(y_test, y_pred)*100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


# 9 Save trained model and scaler for Streamlit app

joblib.dump(calibrator, "heart_disease_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("\nModel and Scaler saved successfully as:")
print("   ‚Ä¢ heart_disease_model.pkl")
print("   ‚Ä¢ scaler.pkl")




Model trained and calibrated successfully!

Accuracy on Test Data: 80.33 %

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.68      0.76        28
           1       0.77      0.91      0.83        33

    accuracy                           0.80        61
   macro avg       0.82      0.79      0.80        61
weighted avg       0.81      0.80      0.80        61


Model and Scaler saved successfully as:
   ‚Ä¢ heart_disease_model.pkl
   ‚Ä¢ scaler.pkl


In [4]:
%%writefile app.py

# Heart Disease Prediction App

# This Streamlit app predicts the likelihood of heart disease
# based on medical data using a trained Logistic Regression model.
# Each input feature is explained with safe/risky indicators.


# Importing Required Libraries

import streamlit as st        # For creating the web app UI
import numpy as np            # For numerical data handling
import joblib                 # To load pre-trained ML model and scaler


# Load Trained Model & Scaler

model = joblib.load("heart_disease_model.pkl")   # Logistic Regression model
scaler = joblib.load("scaler.pkl")               # StandardScaler for normalization


# App Title & Description
st.title("Heart Disease Prediction App")
st.write("This AI/ML model predicts the **risk of heart disease** using your medical data.")
st.write("Please enter your health details carefully to get an AI-based evaluation of your heart condition.")


# Helper Function to Check Safe or Risky Value Ranges

def check_range(value, safe_min, safe_max, feature_name):
    """Checks if a numeric input lies within a safe range."""
    if value < safe_min or value > safe_max:
        st.markdown(f"‚ö†Ô∏è **{feature_name}** is outside safe range ({safe_min}-{safe_max}) ‚Üí üî¥ *Risky*")
    else:
        st.markdown(f" **{feature_name}** is within safe range ({safe_min}-{safe_max}) ‚Üí üü¢ *Safe*")


#  Patient Data Input Section
st.header(" Enter Patient Health Details:")

# 1 Age
age = st.number_input("Age", 20, 100, 45)
st.caption("Age affects the likelihood of heart disease; risk rises significantly after 50.")
check_range(age, 25, 60, "Age")

# 2 Gender
sex = st.selectbox("Gender", ["Male", "Female"])
st.caption("Men tend to have a higher early risk of heart disease compared to women.")
sex_val = 1 if sex == "Male" else 0  # ML model uses numeric encoding

# 3 Chest Pain Type (cp)
cp = st.number_input("Chest Pain Type (0‚Äì3)", 0, 3, 1)
st.caption("0 = Typical Angina, 1 = Atypical Angina, 2 = Non-anginal Pain, 3 = Asymptomatic (silent).")
check_range(cp, 0, 1, "Chest Pain Type")

# 4 Resting Blood Pressure (trestbps)
trestbps = st.number_input("Resting Blood Pressure (mm Hg)", 80, 200, 120)
st.caption("Resting BP over 130 mmHg increases heart strain. Ideal range: 90‚Äì130 mmHg.")
check_range(trestbps, 90, 130, "Blood Pressure")

# 5 Cholesterol (chol)
chol = st.number_input("Cholesterol (mg/dl)", 100, 600, 200)
st.caption("High cholesterol (>240 mg/dl) can lead to arterial plaque buildup.")
check_range(chol, 100, 240, "Cholesterol")

# 6 Fasting Blood Sugar (fbs)
fbs = st.selectbox("Fasting Blood Sugar > 120 mg/dl?", ["No", "Yes"])
st.caption("Fasting blood sugar >120 mg/dl may indicate diabetes ‚Äî a key heart disease risk factor.")
fbs_val = 1 if fbs == "Yes" else 0

# 7 Resting ECG (restecg)
restecg = st.number_input("Resting ECG (0‚Äì2)", 0, 2, 1)
st.caption("ECG checks heart electrical activity; abnormal readings (1‚Äì2) can suggest stress on the heart.")

# 8 Max Heart Rate (thalach)
thalach = st.number_input("Max Heart Rate Achieved", 60, 220, 150)
st.caption("Represents heart performance under stress; higher is generally better (120‚Äì190 bpm ideal).")
check_range(thalach, 120, 190, "Max Heart Rate")

# 9 Exercise Induced Angina (exang)
exang = st.selectbox("Exercise Induced Angina?", ["No", "Yes"])
st.caption("Angina during exercise indicates reduced blood flow to the heart.")
exang_val = 1 if exang == "Yes" else 0

# 10 Oldpeak (ST Depression)
oldpeak = st.number_input("Oldpeak (ST Depression)", 0.0, 6.0, 1.0)
st.caption("Measures heart stress from exercise; values above 2.0 may indicate ischemia.")
check_range(oldpeak, 0.0, 2.0, "Oldpeak (ST Depression)")

# 11 Slope of ST Segment
slope = st.number_input("Slope (0‚Äì2)", 0, 2, 1)
st.caption("ST segment slope; higher values (1‚Äì2) generally indicate better heart function.")

# 12 Major Vessels Colored (ca)
ca = st.number_input("Major Vessels (0‚Äì4)", 0, 4, 0)
st.caption("Number of major blood vessels (0‚Äì4). More blocked vessels = higher risk.")
check_range(ca, 0, 1, "Major Vessels")

# 13 Thalassemia (thal)
thal = st.number_input("Thal (0‚Äì3)", 0, 3, 2)
st.caption("Thalassemia test result: 0=Normal, 1=Fixed defect, 2=Reversible defect ‚Äî higher means lower oxygen flow.")
check_range(thal, 0, 2, "Thal")


# Prepare Input for Prediction

# Combine all user inputs into an array in the same order as the model training features.
input_data = np.array([[age, sex_val, cp, trestbps, chol, fbs_val,
                        restecg, thalach, exang_val, oldpeak,
                        slope, ca, thal]])

# Scale the inputs (use the same scaler used during training)
scaled_data = scaler.transform(input_data)

#  Prediction Button

if st.button("üîç Predict Heart Disease Risk"):

    # Predict probability of heart disease
    prob = model.predict_proba(scaled_data)[0][1]     # Probability of class=1
    confidence = round(prob * 100, 2)                 # Convert to %
    pred = int(prob >= 0.7)                           # Use 70% as safer threshold

    st.subheader("Prediction Result:")
    st.markdown("###  Heart Health Meter")
    st.progress(int(confidence))                      # Visual progress bar

    # Display result category based on confidence
    if confidence < 40:
        st.markdown("<h3 style='color:green;'>Low Risk</h3>", unsafe_allow_html=True)
        st.info("üíö Your heart appears healthy. Keep exercising, eat well, and get regular checkups.")
    elif 40 <= confidence < 70:
        st.markdown("<h3 style='color:orange;'>Moderate Risk</h3>", unsafe_allow_html=True)
        st.warning("üü° There are mild risk signs. Consider consulting a doctor for preventive evaluation.")
    else:
        st.markdown("<h3 style='color:red;'>High Risk</h3>", unsafe_allow_html=True)
        st.error("üü• Strong indicators of possible cardiac stress or artery blockage. Please consult a cardiologist immediately.")

    # Show numeric prediction summary
    if pred == 1:
        st.error(f"üî¥ **High Risk of Heart Disease ‚Äî Confidence: {confidence}%**")
    else:
        st.success(f"üü¢ **Low Risk of Heart Disease ‚Äî Confidence: {confidence}%**")


# Footer & Medical Disclaimer

st.markdown("""
---
###  Medical Note:
This AI tool is for **educational purposes only**.
Machine learning predictions are based on historical data and cannot replace clinical diagnosis.
Please consult a **cardiologist** for accurate assessment and medical guidance.
""")

Writing app.py


In [5]:
# Install required packages (run once)
!pip install streamlit pyngrok --quiet

from pyngrok import ngrok
import subprocess
import time
import requests

# Authenticate ngrok
NGROK_AUTHTOKEN = "38WffZksOZDdF7tNQFkslCLVS3w_6AQsBgZKfsvxFfso4A6ZU"
ngrok.set_auth_token(NGROK_AUTHTOKEN)

# Kill old tunnels / processes
ngrok.kill()
subprocess.run("pkill streamlit", shell=True)
subprocess.run("fuser -k 8501/tcp", shell=True)

# Start Streamlit app
process = subprocess.Popen(
    ["streamlit", "run", "app.py", "--server.port", "8501"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE
)

# Wait for Streamlit to start (poll localhost)
max_wait = 60  # seconds
start_time = time.time()
while True:
    try:
        r = requests.get("http://localhost:8501")
        if r.status_code == 200:
            break
    except:
        pass
    if time.time() - start_time > max_wait:
        print("Streamlit did not start in time. Check app.py for errors.")
        break
    time.sleep(1)

# Open ngrok tunnel
public_url = ngrok.connect(8501)
print("Your Streamlit app is live at:", public_url.public_url)

Your Streamlit app is live at: https://willetta-postorbital-amberly.ngrok-free.dev
