# Cell purpose:
# Install necessary packages used later (XGBoost, scikit-learn, pandas, joblib, streamlit, pyngrok)


In [None]:
# Install required Python packages quietly (no verbose output).
!pip install xgboost scikit-learn pandas joblib streamlit pyngrok --quiet



Cell 2 Create a small sample rabies dataset and save it as CSV

In [None]:
# Cell purpose:
# Create a small CSV dataset inline (string), write it to 'rabies_data.csv', then load it into a pandas DataFrame.
# This is useful for testing the ML pipeline with a contained dataset.



# Import pandas library for handling tabular data and CSV reading/writing.
import pandas as pd

# Create a multi-line string that holds CSV formatted data for rabies cases.
data = """animal_alive,animal,PEP_RECOMMENDED,VISIT_STATUS,victim_environment,animal_environment,symptoms,rabies_status
Yes,Dog,Yes,Visited,Urban,Urban,Fever,1
No,Dog,Yes,Visited,Rural,Urban,Aggression,1
Yes,Cat,No,Not Visited,Urban,Urban,None,0
Yes,Dog,Yes,Visited,Urban,Rural,Fever,1
No,Dog,Yes,Visited,Rural,Rural,Paralysis,1
Yes,Other,No,Not Visited,Urban,Urban,None,0
Yes,Cat,No,Visited,Urban,Urban,None,0
Yes,Dog,Yes,Visited,Urban,Urban,Aggression,1
No,Dog,Yes,Visited,Rural,Rural,Foaming,1
Yes,Dog,No,Not Visited,Urban,Rural,None,0
"""

# Open (or create) a file named 'rabies_data.csv' in write mode, and assign the file handle to f.
with open("rabies_data.csv", "w") as f:
    # Write the CSV formatted string held in 'data' to the file 'rabies_data.csv'.
    f.write(data)

# Read the CSV file 'rabies_data.csv' we just created into a pandas DataFrame named df.
df = pd.read_csv("rabies_data.csv")

# Print a confirmation message showing that the dataset file was created and display its shape (rows, columns).
print("Dataset created successfully! Shape:", df.shape)

# Display the first few rows of the DataFrame (works in notebooks; here we call head()).
df.head()



Cell 3 Preprocessing, pipeline creation, model training, evaluation and saving

In [None]:
# Cell purpose:
# - Define the ML pipeline (preprocessing + XGBoost classifier).
# - Split data into train/test.
# - Train the model, evaluate it, and save the trained pipeline to disk using joblib.
# Each original code line is preserved; comments above each line explain its role.


# Import train_test_split for splitting data into training and testing sets.
from sklearn.model_selection import train_test_split

# Import OneHotEncoder to convert categorical variables into a numeric one-hot representation.
from sklearn.preprocessing import OneHotEncoder

# Import ColumnTransformer to apply transformations to specific columns (like categorical columns).
from sklearn.compose import ColumnTransformer

# Import Pipeline to chain preprocessing and model steps together.
from sklearn.pipeline import Pipeline

# Import SimpleImputer to fill missing values (imputation) with a chosen strategy.
from sklearn.impute import SimpleImputer

# Import the XGBoost classifier model class.
from xgboost import XGBClassifier

# Import joblib to save/load the trained pipeline to/from disk.
import joblib

# Import accuracy and classification report metrics to evaluate the model's performance.
from sklearn.metrics import accuracy_score, classification_report

# Define the name of the target column in the DataFrame.
target = 'rabies_status'

# Create feature matrix X by dropping the target column from df (so X contains only input features).
X = df.drop(columns=[target])

# Create label vector y which contains the target values (0 or 1).
y = df[target]

# Automatically detect categorical columns by selecting columns with dtype object, and convert to a list.
cat_cols = X.select_dtypes(include=['object']).columns.tolist()

# Create a ColumnTransformer named preprocessor:
# For categorical columns, run a pipeline that first imputes missing values using the most frequent value,
# then applies one-hot encoding while ignoring unknown categories at transform time.
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', Pipeline([
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ]), cat_cols)
    ],
    # Pass through any columns not specified in transformers unchanged.
    remainder='passthrough'
)

# Build a full ML pipeline that first runs the preprocessor, then fits the XGBoost classifier.
pipeline = Pipeline([
    ('pre', preprocessor),
    ('clf', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42))
])

# Split the dataset into training and test sets.
# test_size=0.2 means 20% of the data is reserved for testing; random_state ensures reproducibility.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline on the training data (this runs preprocessing - then trains the classifier).
pipeline.fit(X_train, y_train)

# Use the trained pipeline to make predictions for the test set.
y_pred = pipeline.predict(X_test)

# Print a confirmation that the model training completed successfully.
print("Model trained successfully!")

# Compute and print the accuracy score comparing true test labels to predicted labels.
print("Accuracy:", accuracy_score(y_test, y_pred))

# Print a detailed classification report (precision, recall, f1-score) to better understand per-class performance.
print("\n Classification Report:\n", classification_report(y_test, y_pred))

# Save the entire pipeline object (preprocessing + model) to disk as 'xgb_rabies_pipeline.pkl' using joblib.
joblib.dump(pipeline, "xgb_rabies_pipeline.pkl")

# Print confirmation that the pipeline was saved to the file.
print("\n Model saved as xgb_rabies_pipeline.pkl")



Cell 4 Save Streamlit app to file (app.py)

In [None]:
# Rabies Prediction App
# Cell purpose:
# This file creates a complete Streamlit app for rabies risk prediction.
# Features included:
# Multi-symptom input
# Bite location and exposure type
# Smart rule-based alerts
# Dynamic progress bar for risk
# Color-coded risk labels
# ML prediction with rule-based override

%%writefile app.py


# IMPORT LIBRARIES
import streamlit as st           # Streamlit for interactive web apps
import pandas as pd              # Pandas to create structured DataFrames for ML input
import joblib                    # Joblib to load the saved ML pipeline (.pkl)


# LOAD THE TRAINED MODEL
# Load the saved ML pipeline (preprocessing + XGBoost model)
pipeline = joblib.load("xgb_rabies_pipeline.pkl")  # Replace with your pipeline path

# APP TITLE AND INTRODUCTION

st.title("Rabies Prediction App")  # Main title displayed at the top
st.write("AI/ML based prediction model for Rabies in humans based on exposure details.")  # Short description
st.header("Enter Patient / Animal Details")  # Header for input section


# INPUT FIELDS SECTION

# Dropdown for whether the animal is alive
animal_alive = st.selectbox("Is the animal alive?", ["Yes", "No"])
# Dropdown for the type of animal
animal = st.selectbox("Animal Type", ["Dog", "Cat", "Other"])
# Dropdown for whether PEP was recommended
PEP_RECOMMENDED = st.selectbox("Was PEP (Post Exposure Prophylaxis) recommended?", ["Yes", "No"])
# Dropdown for whether the victim visited a healthcare center
VISIT_STATUS = st.selectbox("Visit Status", ["Visited", "Not Visited"])
# Dropdown for environment of the victim
victim_environment = st.selectbox("Victim Environment", ["Urban", "Rural"])
# Dropdown for environment of the animal
animal_environment = st.selectbox("Animal Environment", ["Urban", "Rural"])


# SYMPTOMS MULTI-SELECTION

# Multi-select box allows selecting multiple symptoms at once
# Default is "None" meaning no symptoms
symptoms = st.multiselect(
    "Select up to 3 symptoms (choose 'None' if no symptoms):",
    [
        "None", "Fever", "Headache", "Fatigue", "Itching" , "Nausea", "Pain at bite site",
        "Aggression", "Hydrophobia", "Paralysis", "Agitation", "Anxiety",
        "Confusion", "Foaming", "Difficulty swallowing", "Excess salivation",
        "Seizures", "Hallucinations"
    ],
    default=["None"]  # Default selection shown
)
# Convert the selected symptoms list into a single comma-separated string for ML input
symptom_text = ", ".join(symptoms)


# ADDITIONAL MEDICAL DETAILS

# Dropdown for bite location; face/neck = high risk
bite_location = st.selectbox("Bite Location:", ["Hand", "Leg", "Face", "Neck", "Other"])
# Dropdown for type of exposure; Bite = high, Scratch = medium, Lick = low
exposure_type = st.selectbox("Exposure Type:", ["Bite", "Scratch", "Lick", "Other"])


# SMART RULE-BASED WARNING LOGIC

# Strong symptoms strongly linked to rabies
strong_symptoms = ["Aggression", "Hydrophobia", "Paralysis", "Foaming", "Seizures"]

# Mild/general symptoms list used for rule based overrides
mild_symptoms = ["Fever", "Headache", "Fatigue", "Nausea", "Pain at bite site",
                 "Agitation", "Anxiety", "Confusion", "Difficulty swallowing", "Excess salivation",
                 "Hallucinations"]

# High-risk bite locations
high_risk_bites = ["Face", "Neck"]
# High-risk exposure types
high_risk_exposure = ["Bite"]

# Initialize pre-risk score: 0 = Low, 1 = Medium, 2 = High
pre_risk_score = 0

# Check if any strong symptoms selected ‚Üí High risk
if any(symptom in symptom_text for symptom in strong_symptoms):
    pre_risk_score = 2
# Else check for mild/general symptoms (not "None") ‚Üí Medium risk
elif any(symptom != "None" for symptom in symptoms):
    pre_risk_score = 1

# Upgrade risk if bite location or exposure type is high-risk
if bite_location in high_risk_bites or exposure_type in high_risk_exposure:
    pre_risk_score = max(pre_risk_score, 2)

# Display pre-prediction alert to user
if pre_risk_score == 0:
    st.info("üü¢ Low risk: No major symptoms or high risk exposure detected.")
elif pre_risk_score == 1:
    st.info("üü° Medium risk: Mild/general symptoms or moderate risk exposure.")
else:
    st.warning("üü† High risk: Strong symptoms or high risk exposure detected. Please consult a doctor!")


# DYNAMIC RISK METER

# Convert risk score to percentage (0 = Low, 50 = Medium, 100 = High)
risk_percentage = (pre_risk_score / 2) * 100
st.progress(int(risk_percentage))  # Show a progress bar

# Display color-coded risk label using HTML
if pre_risk_score == 0:
    st.markdown("<h3 style='color:green;'>Low Risk</h3>", unsafe_allow_html=True)
elif pre_risk_score == 1:
    st.markdown("<h3 style='color:orange;'>Medium Risk</h3>", unsafe_allow_html=True)
else:
    st.markdown("<h3 style='color:red;'>High Risk</h3>", unsafe_allow_html=True)

# CREATE DATAFRAME FOR ML PREDICTION

# Combine all user inputs into a single-row DataFrame
data = pd.DataFrame([{
    'animal_alive': animal_alive,
    'animal': animal,
    'PEP_RECOMMENDED': PEP_RECOMMENDED,
    'VISIT_STATUS': VISIT_STATUS,
    'victim_environment': victim_environment,
    'animal_environment': animal_environment,
    'symptoms': symptom_text,
    'bite_location': bite_location,
    'exposure_type': exposure_type
}])

# PREDICTION BUTTON

if st.button("üîç Predict Rabies Risk"):
    try:
        # ML probability prediction for rabies (class 1)
        prob = pipeline.predict_proba(data)[0][1]

        # THRESHOLD ADJUSTMENT
        # Only probabilities >= 0.5 treated as high risk
        pred = int(prob >= 0.5)

        # RULE-BASED OVERRIDE
        # If only mild symptoms selected (not None), force Low Risk
        if all(symptom in mild_symptoms for symptom in symptoms) and "None" not in symptoms:
            pred = 0

        # DISPLAY PREDICTION RESULT
        st.subheader("Prediction Result:")
        if pred == 1:
            st.error(f"üü• High Risk of Rabies (Probability: {prob:.2f})")
        else:
            st.success(f"üü© Low Risk of Rabies (Probability: {prob:.2f})")

    except Exception as e:
        # Show error if prediction fails
        st.error(f"‚ö†Ô∏è Prediction failed: {e}")




Cell 5 Launch Streamlit app with ngrok and create public URL

In [None]:
# Import ngrok helper to create a public tunnel, and time/os for process control.
from pyngrok import ngrok
import time
import os

# Replace with your own Ngrok token if needed
NGROK_AUTHTOKEN = "3536tjxZzkcNAbMjjVIegi0rOA9_4QgBc7uxDJWnwaxFDtsnq"

# Configure ngrok with the provided auth token so you can make public tunnels.
ngrok.set_auth_token(NGROK_AUTHTOKEN)

# Close any existing ngrok tunnels/processes to avoid conflicts.
ngrok.kill()

# Launch the Streamlit app by issuing a system command that runs it on port 8501 in the background.
os.system("streamlit run app.py --server.port 8501 &")

# Wait a few seconds to give Streamlit time to start up before creating the tunnel.
time.sleep(8)

# Create a public ngrok tunnel that forwards to local port 8501 and capture the returned public URL object.
public_url = ngrok.connect(8501)

# Print a friendly message and the public URL so you can open the app in a browser.
print(" Your Rabies Prediction App is live at:")
print(public_url)



