In [1]:
import pandas as pd
import numpy as np

# Define diseases and medicines
disease_meds = {
    "Allergy": ["Cetirizine", "Fexofenadine", "Loratadine", "Montelukast"],
    "Diabetes": ["Metformin", "Insulin", "Glipizide", "Dapagliflozin"],
    "Fever": ["Paracetamol", "Ibuprofen", "Aspirin"],
    "Asthma": ["Salbutamol", "Budesonide", "Montelukast"],
    "Blood Pressure": ["Amlodipine", "Losartan", "Telmisartan", "Atenolol"],
    "Pain": ["Ibuprofen", "Diclofenac", "Naproxen", "Aspirin"],
}

# Severity mapping (ensures different outputs)
severity_map = {
    "Cetirizine": 1.5, "Fexofenadine": 2.2, "Loratadine": 1.2, "Montelukast": 3.1,
    "Metformin": 3.9, "Insulin": 4.5, "Glipizide": 3.2, "Dapagliflozin": 2.4,
    "Paracetamol": 1.0, "Ibuprofen": 2.7, "Aspirin": 3.0,
    "Salbutamol": 2.0, "Budesonide": 1.8,
    "Amlodipine": 3.0, "Losartan": 2.2, "Telmisartan": 2.3, "Atenolol": 3.3,
    "Diclofenac": 3.4, "Naproxen": 2.1,
}

# Generate 1300 rows
rows = []
for _ in range(1300):
    disease = np.random.choice(list(disease_meds.keys()))
    med = np.random.choice(disease_meds[disease])

    rows.append([
        med,
        disease,
        np.random.randint(80, 400),  # Average Price
        np.random.randint(50, 350),  # Cheaper Alternative Price
        severity_map.get(med, np.random.uniform(1,5)),  # Side Effect Severity
        np.random.uniform(5, 9),     # Effectiveness Score
        np.random.randint(60, 100),  # Alternative Availability Score
        np.random.choice([0,1]),     # Chronic Use Flag
        np.random.choice([0,1]),     # Hospital Visit Flag
        np.random.choice(["Child","Adult","Senior"]),  # Age Group
        np.random.choice([0,1]),     # Target Adherence Level
        np.random.choice(["Male","Female"]),  # Gender
        "Dataset"                    # Source
    ])

# Create DataFrame
df = pd.DataFrame(rows, columns=[
    "Medicine_Name","Condition_Treated","Average_Price","Cheaper_Alternative_Price",
    "Side_Effects_Severity","Medicine_Effectiveness_Score","Alternative_Availability_Score",
    "Chronic_Use_Flag","Hospital_Visit_Flag","Age_Group","Adherence_Level","Gender","Source"
])

# Save CSV
df.to_csv("/content/medicine_hackathon_ready.csv", index=False)
print("‚úÖ CSV generated! Shape:", df.shape)

from google.colab import files
files.download("/content/medicine_hackathon_ready.csv")


‚úÖ CSV generated! Shape: (1300, 13)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [5]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression

# Load dataset
df = pd.read_csv("/content/medicine_hackathon_ready.csv")

# ---- Encode ALL categorical columns ----
cat_cols = ["Medicine_Name", "Condition_Treated", "Gender", "Age_Group", "Source"]

encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le  # store encoder

# ---- Now all columns are numeric ----

# Separate features & target
X = df.drop(columns=["Adherence_Level"])
y = df["Adherence_Level"]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ---- Scale features ----
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ---- Train Logistic Regression Classifier ----
model = LogisticRegression().fit(X_train_scaled, y_train)

# ---- Save model & scaler ----
pickle.dump(model, open("/content/medicine_model.pkl", "wb"))
pickle.dump(scaler, open("/content/medicine_scaler.pkl", "wb"))
pickle.dump(X.columns.tolist(), open("/content/medicine_features.pkl", "wb"))

# ---- Save each encoder separately ----
for col, le in encoders.items():
    pickle.dump(le, open(f"/content/{col}_label_encoder.pkl", "wb"))

print("Pickle files generated! You can deploy now.")


Pickle files generated! You can deploy now.


In [7]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.52.2-py3-none-any.whl.metadata (9.8 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.52.2-py3-none-any.whl (9.0 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.0/9.0 MB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.52.2


In [9]:
%%writefile app.py
import streamlit as st
import pickle
import pandas as pd
import numpy as np

# ---- Load pickles ----
model = pickle.load(open("medicine_model.pkl", "rb"))
scaler = pickle.load(open("medicine_scaler.pkl", "rb"))
features = pickle.load(open("medicine_features.pkl", "rb"))

# ---- Load encoders ----
enc_medicine = pickle.load(open("Medicine_Name_label_encoder.pkl", "rb"))
enc_condition = pickle.load(open("Condition_Treated_label_encoder.pkl", "rb"))
enc_gender = pickle.load(open("Gender_label_encoder.pkl", "rb"))
enc_agegroup = pickle.load(open("Age_Group_label_encoder.pkl", "rb"))
enc_source = pickle.load(open("Source_label_encoder.pkl", "rb"))

# ---- Load dataset ----
df = pd.read_csv("medicine_hackathon_ready.csv")

st.set_page_config(page_title="Medicine Truth Label AI", layout="centered")
st.title("ü©∫ Medicine Truth Label AI")

st.subheader("Enter details manually")

# ---- Direct user inputs ----
user_age = st.number_input("Enter Age", 1, 120, step=1)
user_gender = st.text_input("Enter Gender (Male/Female)")
user_disease = st.text_input("Enter Disease/Condition")
user_medicine = st.text_input("Enter Medicine Name")

# Normalize Gender text safely
if user_gender:
    user_gender = user_gender.strip().capitalize()
    if user_gender not in ["Male", "Female"]:
        st.error("‚ö† Please enter gender exactly as: Male or Female")
        st.stop()

# ---- Button action ----
if st.button("Analyze Medicine"):
    try:
        # Encode categorical inputs
        med_encoded = enc_medicine.transform([user_medicine])[0]
        disease_encoded = enc_condition.transform([user_disease])[0]
        gender_encoded = enc_gender.transform([user_gender])[0]

        # Build input for model (stable, not random)
        input_data = []
        for f in features:
            if f == "Medicine_Name":
                input_data.append(med_encoded)
            elif f == "Condition_Treated":
                input_data.append(disease_encoded)
            elif f == "Gender":
                input_data.append(gender_encoded)
            elif f == "Age":
                input_data.append(user_age)
            elif f == "Age_Group":
                input_data.append(enc_agegroup.transform([user_age])[0])
            elif f == "Source":
                input_data.append(enc_source.transform(["Dataset"])[0])
            else:
                input_data.append(df[f].mean())  # only for least impactful numeric fields

        df_input = pd.DataFrame([input_data], columns=features)
        scaled = scaler.transform(df_input)

        # Predict classification (0 or 1)
        pred = model.predict(scaled)[0]

        # ---- Classification result ----
        if pred == 1:
            st.success("Adherence Prediction: Good (Low Risk)")
        else:
            st.error("Adherence Prediction: Poor (High Risk) ‚ö†")

        # ---- Medicine comparison for same disease ----
        st.subheader(f"üíä Personalized medicine comparison for disease: {user_disease}")

        disease_df = df[df["Condition_Treated"] == user_disease]

        if disease_df.empty:
            st.error("‚ùó No medicines found for this disease!")
        else:
            comparison = disease_df.groupby("Medicine_Name").agg({
                "Side_Effects_Severity": "mean",
                "Average_Price": "mean",
                "Medicine_Effectiveness_Score": "mean",
                "Dosage_mg": "mean"
            }).round(1)

            comparison.rename(columns={
                "Side_Effects_Severity": "Avg_SideEffect_Risk",
                "Average_Price": "Avg_Price (‚Çπ)",
                "Medicine_Effectiveness_Score": "Avg_Effectiveness",
                "Dosage_mg": "Avg_Dosage_mg"
            }, inplace=True)

            # Sort by side effect severity (ascending = safest first)
            comparison = comparison.sort_values(by="Avg_SideEffect_Risk")
            st.dataframe(comparison)

            # Summary for medicines of same disease
            st.subheader("üí° Medicine Summary:")
            for med, row in comparison.iterrows():
                risk = row["Avg_SideEffect_Risk"]
                if risk <= 2:
                    msg = "Mild side effects, safer option"
                elif risk <= 3.5:
                    msg = "Medium side effects, monitor your health"
                else:
                    msg = "Strong side effects, be careful!"
                st.write(f"- {med}: {msg}")

        # ---- Chatbot note ----
        st.subheader("ü§ñ Lifestyle Recommendations")
        st.info("Lifestyle, diet plans, do‚Äôs/don‚Äôts, tips, and motivation will be generated using chatbot later.")

    except Exception as e:
        st.error(f"‚ö† Error: {str(e)}")


Writing app.py


In [10]:
req = """streamlit
pandas
scikit-learn
numpy"""

with open("/content/requirements.txt", "w") as f:
    f.write(req)

print("requirements.txt created!")

from google.colab import files
files.download("/content/requirements.txt")


requirements.txt created!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>