In [8]:
import streamlit as st
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


In [2]:
df = pd.read_csv(r"/home/intellact/Documents/genDs/projects/Logistic/framingham_heart_disease(2).csv")

# Preview the data
print(df.head())
print("\nDataset Info:")
print(df.info())

   male  age  education  currentSmoker  cigsPerDay  BPMeds  prevalentStroke  \
0     1   39        4.0              0         0.0     0.0                0   
1     0   46        2.0              0         0.0     0.0                0   
2     1   48        1.0              1        20.0     0.0                0   
3     0   61        3.0              1        30.0     0.0                0   
4     0   46        3.0              1        23.0     0.0                0   

   prevalentHyp  diabetes  totChol  sysBP  diaBP    BMI  heartRate  glucose  \
0             0         0    195.0  106.0   70.0  26.97       80.0     77.0   
1             0         0    250.0  121.0   81.0  28.73       95.0     76.0   
2             0         0    245.0  127.5   80.0  25.34       75.0     70.0   
3             1         0    225.0  150.0   95.0  28.58       65.0    103.0   
4             0         0    285.0  130.0   84.0  23.10       85.0     85.0   

   TenYearCHD  
0           0  
1           0  
2 

In [3]:
print("\nMissing Values:\n", df.isnull().sum())

# Basic statistics
print("\nStatistics:\n", df.describe())



Missing Values:
 male                 0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
TenYearCHD           0
dtype: int64

Statistics:
               male          age    education  currentSmoker   cigsPerDay  \
count  4238.000000  4238.000000  4133.000000    4238.000000  4209.000000   
mean      0.429212    49.584946     1.978950       0.494101     9.003089   
std       0.495022     8.572160     1.019791       0.500024    11.920094   
min       0.000000    32.000000     1.000000       0.000000     0.000000   
25%       0.000000    42.000000     1.000000       0.000000     0.000000   
50%       0.000000    49.000000     2.000000       0.000000     0.000000   
75%       1.000000    56.000000     3.000000       1.00

In [4]:
df.fillna(df.median(numeric_only=True), inplace=True)

In [5]:
print(df.isnull().sum())


male               0
age                0
education          0
currentSmoker      0
cigsPerDay         0
BPMeds             0
prevalentStroke    0
prevalentHyp       0
diabetes           0
totChol            0
sysBP              0
diaBP              0
BMI                0
heartRate          0
glucose            0
TenYearCHD         0
dtype: int64


In [9]:

X = df.drop("TenYearCHD", axis=1)
y = df["TenYearCHD"]

# --------------------------------
# Feature Scaling
# --------------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# --------------------------------
# Model Training
# --------------------------------
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# --------------------------------
# Model Evaluation
# --------------------------------
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

st.subheader("Model Performance")
st.success(f"Accuracy: {accuracy * 100:.2f}%")

# --------------------------------
# User Input Section
# --------------------------------
st.subheader("Enter Patient Details")

def user_input_features():
    male = st.selectbox("Gender (Male=1, Female=0)", [0, 1])
    age = st.slider("Age", 30, 80, 45)
    education = st.selectbox("Education Level", [1, 2, 3, 4])
    currentSmoker = st.selectbox("Current Smoker", [0, 1])
    cigsPerDay = st.slider("Cigarettes Per Day", 0.0, 70.0, 0.0)
    BPMeds = st.selectbox("On BP Medication", [0, 1])
    prevalentStroke = st.selectbox("Previous Stroke", [0, 1])
    prevalentHyp = st.selectbox("Hypertension", [0, 1])
    diabetes = st.selectbox("Diabetes", [0, 1])
    totChol = st.slider("Total Cholesterol", 100.0, 400.0, 200.0)
    sysBP = st.slider("Systolic BP", 90.0, 250.0, 120.0)
    diaBP = st.slider("Diastolic BP", 60.0, 150.0, 80.0)
    BMI = st.slider("BMI", 15.0, 50.0, 25.0)
    heartRate = st.slider("Heart Rate", 50.0, 150.0, 75.0)
    glucose = st.slider("Glucose Level", 40.0, 400.0, 80.0)

    data = {
        "male": male,
        "age": age,
        "education": education,
        "currentSmoker": currentSmoker,
        "cigsPerDay": cigsPerDay,
        "BPMeds": BPMeds,
        "prevalentStroke": prevalentStroke,
        "prevalentHyp": prevalentHyp,
        "diabetes": diabetes,
        "totChol": totChol,
        "sysBP": sysBP,
        "diaBP": diaBP,
        "BMI": BMI,
        "heartRate": heartRate,
        "glucose": glucose,
    }

    return pd.DataFrame([data])

input_df = user_input_features()

# --------------------------------
# Prediction
# --------------------------------
input_scaled = scaler.transform(input_df)
prediction = model.predict(input_scaled)
prediction_proba = model.predict_proba(input_scaled)

st.subheader("Prediction Result")

if prediction[0] == 1:
    st.error("⚠️ High Risk of Heart Disease (10-year risk)")
else:
    st.success("✅ Low Risk of Heart Disease")

st.write(f"Probability of Heart Disease: **{prediction_proba[0][1] * 100:.2f}%**")


2026-01-04 15:34:17.493 
  command:

    streamlit run /home/intellact/.local/lib/python3.8/site-packages/ipykernel_launcher.py [ARGUMENTS]
2026-01-04 15:34:17.513 Session state does not function when running a script without `streamlit run`


In [14]:
import joblib

joblib.dump(model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [15]:
%%writefile app.py
import streamlit as st
import numpy as np
import joblib

# Load model and scaler
model = joblib.load("model.pkl")
scaler = joblib.load("scaler.pkl")

st.set_page_config(page_title="Heart Disease Prediction App", layout="centered")

st.title("❤️ Heart Disease Prediction App")
st.write("Enter patient details to predict 10-year heart disease risk")

# Inputs
male = st.number_input("Gender (Male=1, Female=0)", min_value=0, max_value=1, value=1)
age = st.number_input("Age", min_value=30, max_value=80, value=45)
education = st.number_input("Education Level (1–4)", min_value=1, max_value=4, value=2)
currentSmoker = st.number_input("Current Smoker (0/1)", min_value=0, max_value=1, value=0)
cigsPerDay = st.number_input("Cigarettes Per Day", min_value=0.0, max_value=70.0, value=0.0)
BPMeds = st.number_input("On BP Medication (0/1)", min_value=0, max_value=1, value=0)
prevalentStroke = st.number_input("Previous Stroke (0/1)", min_value=0, max_value=1, value=0)
prevalentHyp = st.number_input("Hypertension (0/1)", min_value=0, max_value=1, value=0)
diabetes = st.number_input("Diabetes (0/1)", min_value=0, max_value=1, value=0)
totChol = st.number_input("Total Cholesterol", min_value=100.0, max_value=400.0, value=200.0)
sysBP = st.number_input("Systolic BP", min_value=90.0, max_value=250.0, value=120.0)
diaBP = st.number_input("Diastolic BP", min_value=60.0, max_value=150.0, value=80.0)
BMI = st.number_input("BMI", min_value=15.0, max_value=50.0, value=25.0)
heartRate = st.number_input("Heart Rate", min_value=50.0, max_value=150.0, value=75.0)
glucose = st.number_input("Glucose Level", min_value=40.0, max_value=400.0, value=80.0)

# Predict button
if st.button("Predict"):
    new_data = np.array([[ 
        male, age, education, currentSmoker, cigsPerDay,
        BPMeds, prevalentStroke, prevalentHyp, diabetes,
        totChol, sysBP, diaBP, BMI, heartRate, glucose
    ]])

    new_data_scaled = scaler.transform(new_data)
    prediction = model.predict(new_data_scaled)[0]

    if prediction == 1:
        st.success("⚠️ High Risk of Heart Disease")
    else:
        st.warning("✅ Low Risk of Heart Disease")


Overwriting app.py
