In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

# Load dataset from Excel
df = pd.read_excel("CMU PROJECT DATASET.xlsx")

# Show basic info
print("Dataset shape:", df.shape)
print(df.head())


Dataset shape: (45530, 13)
   id        Age  gender  height  weight  ap_hi  ap_lo  cholesterol  gluc  \
0   1  51.663014       1     156    85.0    140     90            3     1   
1   2  47.873973       1     165    64.0    130     70            3     1   
2   4  60.583562       1     156    56.0    100     60            1     1   
3   8  48.405479       1     151    67.0    120     80            2     2   
4   9  54.339726       1     157    93.0    130     80            3     1   

   smoke  alco  active  cardio  
0      0     0       1       1  
1      0     0       0       1  
2      0     0       0       0  
3      0     0       0       0  
4      0     0       1       0  


In [None]:
# Remove unrealistic heights/weights (e.g., <100cm or >250cm, weight <30kg or >200kg)
df = df[(df['height'] >= 100) & (df['height'] <= 250)]
df = df[(df['weight'] >= 30) & (df['weight'] <= 200)]

# Check cleaned size
print("Cleaned dataset shape:", df.shape)


Cleaned dataset shape: (45508, 13)


In [None]:


# BMI calculation
df['BMI'] = df['weight'] / (df['height'] / 100) ** 2

# Hypertension flag: 1 if systolic >=140 or diastolic >=90
df['hypertension'] = ((df['ap_hi'] >= 140) | (df['ap_lo'] >= 90)).astype(int)

# Age groups
df['age_group'] = pd.cut(df['Age'], bins=[0, 39, 50, 120], labels=['reproductive', 'perimenopause', 'postmenopause'])


In [None]:
# Usually 1=female, 2=male in medical datasets — confirm with your dataset documentation!
df_female = df[df['gender'] == 1]

print("Female records:", df_female.shape[0])


Female records: 45508


In [None]:
# Define features for modeling
features = ['Age', 'BMI', 'cholesterol', 'gluc', 'smoke', 'alco', 'active', 'ap_hi', 'ap_lo', 'hypertension']
X = df_female[features]
y = df_female['cardio']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')

# Fit imputer on training data and transform both train and test
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Now train on imputed data
model = LogisticRegression(max_iter=1000)
model.fit(X_train_imputed, y_train)

# Predictions on test data
y_pred = model.predict(X_test_imputed)
y_proba = model.predict_proba(X_test_imputed)[:, 1]

# Then get intercept and coefficients
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)


Intercept: [-2.35453395]
Coefficients: [[ 2.84167402e-03  3.31261184e-02  5.60730191e-01 -7.31582783e-02
  -1.59965132e-01 -2.90869457e-01 -2.02570941e-01  1.61033435e-03
   2.68204988e-04  1.68575947e+00]]


In [None]:
print("Classification Report:\n", classification_report(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_proba))

# Show model coefficients for interpretability
coef_df = pd.DataFrame({
    'feature': features,
    'coefficient': model.coef_[0]
}).sort_values(by='coefficient', ascending=False)

print("\nFeature importance (coefficients):\n", coef_df)


Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.81      0.74      4581
           1       0.77      0.62      0.69      4521

    accuracy                           0.72      9102
   macro avg       0.73      0.72      0.72      9102
weighted avg       0.73      0.72      0.72      9102

ROC AUC Score: 0.7559631371241369

Feature importance (coefficients):
         feature  coefficient
9  hypertension     1.685759
2   cholesterol     0.560730
1           BMI     0.033126
0           Age     0.002842
7         ap_hi     0.001610
8         ap_lo     0.000268
3          gluc    -0.073158
4         smoke    -0.159965
6        active    -0.202571
5          alco    -0.290869


In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)  # returns numpy array

X_train, X_test, y_train, y_test = train_test_split(
    X_imputed, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
def heartreach_triage_score(row):
    score = 0
    if row['hypertension'] == 1:
        score += 3
    if row['cholesterol'] >= 2:
        score += 2
    if row['BMI'] >= 30:
        score += 1
    if row['age'] >= 50:
        score += 1
    if row['active'] == 1:
        score -= 1  # Active lifestyle lowers risk slightly
    return score

def triage_advice(score):
    if score >= 4:
        return "High risk detected. Please seek urgent medical care immediately."
    elif score >= 2:
        return "Moderate risk detected. We recommend scheduling a doctor visit soon."
    else:
        return "Low risk detected. Maintain healthy habits and monitor your health."

# Example usage with a new patient record dictionary
new_patient = {
    'age': 52,
    'BMI': 32,
    'cholesterol': 2,
    'hypertension': 1,
    'active': 0
}

score = heartreach_triage_score(new_patient)
advice = triage_advice(score)
print(f"Triage score: {score}")
print(f"Advice: {advice}")


Triage score: 7
Advice: High risk detected. Please seek urgent medical care immediately.


In [None]:
patient_with_symptoms = {
    'age': 52,
    'BMI': 32,
    'cholesterol': 2,
    'hypertension': 1,
    'active': 0,
    'fatigue': 1,
    'nausea': 0,
    'anxiety': 0,
    'dizziness': 1
}

score = heartreach_triage_score_extended(patient_with_symptoms)
advice = triage_advice(score)
print(f"Triage score: {score}")
print(f"Advice: {advice}")


Triage score: 11
Advice: High risk detected. Please seek urgent medical care immediately.


In [None]:
!pip install streamlit -q


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m62.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m107.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import time
from pyngrok import ngrok

# Start Streamlit app in the background
get_ipython().system_raw('streamlit run app.py &')

# Wait a few seconds for the app to start
time.sleep(5)

# Open ngrok tunnel on port 8501
public_url = ngrok.connect(8501)
print(f"Your app is live here: {public_url}")


Your app is live here: NgrokTunnel: "https://76e6-34-75-252-199.ngrok-free.app" -> "http://localhost:8501"


In [None]:
!ngrok authtoken "2zKpfubwf9enX2qSFDs5wn22azF_2RFa7zQK2SUxyDndk3jrt"


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
%%writefile app.py
import streamlit as st

def heartreach_triage_score(row):
    score = 0
    if row['hypertension'] == 1:
        score += 3
    if row['cholesterol'] >= 2:
        score += 2
    if row['BMI'] >= 30:
        score += 1
    if row['age'] >= 50:
        score += 1
    if row['active'] == 1:
        score -= 1
    return score

def triage_advice(score):
    if score >= 4:
        return "High risk detected. Please seek urgent medical care immediately."
    elif score >= 2:
        return "Moderate risk detected. We recommend scheduling a doctor visit soon."
    else:
        return "Low risk detected. Maintain healthy habits and monitor your health."

def calculate_BMI(weight_kg, height_cm):
    return weight_kg / ((height_cm / 100) ** 2)

def calculate_hypertension(ap_hi, ap_lo):
    return 1 if (ap_hi >= 140 or ap_lo >= 90) else 0

st.title("HeartReach: Cardiovascular Triage for Women")

age = st.number_input("Age (years)", min_value=10, max_value=120, value=40)
weight = st.number_input("Weight (kg)", min_value=20.0, max_value=200.0, value=60.0)
height = st.number_input("Height (cm)", min_value=100.0, max_value=250.0, value=160.0)
cholesterol = st.selectbox("Cholesterol level", options=[1, 2, 3], format_func=lambda x: {1:"Normal", 2:"Above normal", 3:"Well above normal"}[x])
ap_hi = st.number_input("Systolic blood pressure (ap_hi)", min_value=80, max_value=250, value=120)
ap_lo = st.number_input("Diastolic blood pressure (ap_lo)", min_value=40, max_value=150, value=80)
active = st.selectbox("Physically active?", options=[0, 1], format_func=lambda x: {0:"No", 1:"Yes"}[x])

if st.button("Get Triage Advice"):
    BMI = calculate_BMI(weight, height)
    hypertension = calculate_hypertension(ap_hi, ap_lo)
    patient = {
        'age': age,
        'BMI': BMI,
        'cholesterol': cholesterol,
        'hypertension': hypertension,
        'active': active
    }
    score = heartreach_triage_score(patient)
    advice = triage_advice(score)
    st.write(f"### Triage score: {score}")
    st.write(f"### Advice: {advice}")


Writing app.py


In [None]:
print("Intercept:", model.intercept_)


NameError: name 'model' is not defined