# SALKS Integrated Chatbot (Notebook)

This notebook integrates the SALKS ensemble (ANN + KNN + Logistic Regression with SMOTE) with a chatbot-style risk assessment.


In [9]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization


In [10]:
DATA_PATH = "../data/heart.csv"
FEATURES = ["age", "sex", "trestbps", "chol", "cp", "thalach", "fbs", "restecg", "exang"]


In [11]:
# Load and prepare data
_df = pd.read_csv(DATA_PATH)
_df = _df.drop_duplicates()

X = _df[FEATURES]
y = _df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Balance the data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)


In [12]:
# Build ANN

def build_ann(input_dim: int):
    model = Sequential([
        Dense(128, activation="relu", input_shape=(input_dim,)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation="relu"),
        BatchNormalization(),
        Dropout(0.2),
        Dense(1, activation="sigmoid"),
    ])
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

ann_model = build_ann(X_train_scaled.shape[1])
ann_model.fit(X_train_scaled, y_train_resampled, epochs=40, batch_size=16, verbose=0)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<keras.src.callbacks.history.History at 0x14e9d2940>

In [13]:
# Train KNN
knn_model = KNeighborsClassifier(n_neighbors=7, weights="distance", metric="manhattan")
knn_model.fit(X_train_scaled, y_train_resampled)


In [14]:
# Train meta-model (Logistic Regression)
ann_probs = ann_model.predict(X_test_scaled, verbose=0).flatten()
knn_probs = knn_model.predict_proba(X_test_scaled)[:, 1]
meta_features = np.column_stack((ann_probs, knn_probs))

meta_model = LogisticRegressionCV(cv=5, max_iter=1000)
meta_model.fit(meta_features, y_test)


In [15]:
# Helper functions

def categorize_blood_pressure(bp):
    if bp < 90:
        return "Low"
    elif bp <= 120:
        return "Normal"
    return "High"


def categorize_cholesterol(chol):
    if chol < 200:
        return "Normal"
    elif chol <= 240:
        return "Borderline High"
    return "High"


def predict_risk(age, sex, trestbps, chol, cp, thalach, fbs, restecg, exang):
    features = np.array([[age, sex, trestbps, chol, cp, thalach, fbs, restecg, exang]])
    scaled = scaler.transform(features)
    ann_prob = ann_model.predict(scaled, verbose=0).flatten()
    knn_prob = knn_model.predict_proba(scaled)[:, 1]
    meta = np.column_stack((ann_prob, knn_prob))
    pred = meta_model.predict(meta)[0]

    manual_high_risk = trestbps > 120 or chol > 240 or cp >= 2 or exang == 1
    if manual_high_risk:
        return "High"
    return "High" if pred == 1 else "No Risk, Healthy"


In [16]:
# Chatbot-style interaction

def categorize_blood_pressure(bp):
    if bp < 120:
        return "Normal"
    elif bp < 140:
        return "Prehypertension"
    else:
        return "High"

def categorize_cholesterol(chol):
    if chol < 200:
        return "Normal"
    elif chol < 240:
        return "Borderline High"
    else:
        return "High"

def predict_risk(age, sex, trestbps, chol, cp, thalach, fbs, restecg, exang):
    score = 0

    if age > 50:
        score += 1
    if trestbps > 140:
        score += 1
    if chol > 240:
        score += 1
    if fbs == 1:
        score += 1
    if exang == 1:
        score += 1

    if score >= 3:
        return "High"
    else:
        return "Low"


def chatbot():
    print("Welcome to the AI-Driven Heart Disease Chatbot!")

    age = int(input("Age: "))
    sex = int(input("Sex (1=Male, 0=Female): "))
    trestbps = int(input("Resting Blood Pressure (mmHg): "))
    chol = int(input("Cholesterol Level (mg/dL): "))
    cp = int(input("Chest Pain Type (0=None, 1=Mild, 2=Moderate, 3=Severe): "))
    thalach = int(input("Max Heart Rate Achieved: "))
    fbs = int(input("Fasting Blood Sugar (>120 mg/dL) (1=Yes, 0=No): "))
    restecg = int(input("Resting ECG (0=Normal, 1=ST-T wave abnormality, 2=Possible LVH): "))
    exang = int(input("Exercise-Induced Angina (1=Yes, 0=No): "))

    bp_category = categorize_blood_pressure(trestbps)
    chol_category = categorize_cholesterol(chol)

    risk_level = predict_risk(age, sex, trestbps, chol, cp, thalach, fbs, restecg, exang)

    print("\n--- Health Analysis ---")
    print(f"Blood Pressure Level: {bp_category}")
    print(f"Cholesterol Level: {chol_category}")

    print("\n--- Risk Prediction ---")
    print(f"Risk Level: {risk_level}")

    print("\n--- Recommended Health Tips ---")

    if risk_level == "High":
        tips = [
            "Consult a doctor as soon as possible.",
            "Follow a diet low in saturated fats and high in fiber.",
            "Exercise at least 30 minutes daily.",
            "Reduce stress through meditation or yoga.",
            "Quit smoking and limit alcohol intake.",
            "Schedule regular checkups for blood pressure and cholesterol.",
        ]
    else:
        tips = [
            "Maintain a balanced diet and regular exercise.",
            "Continue periodic monitoring of blood pressure and cholesterol.",
            "Stay active to reduce future risk.",
        ]

    for tip in tips:
        print(f"- {tip}")


chatbot()


Welcome to the AI-Driven Heart Disease Chatbot!


ValueError: invalid literal for int() with base 10: ''