<a href="https://colab.research.google.com/github/kunalnischal7/CuraConnectAI/blob/main/CuraConnectAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import spacy
import re
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier
import joblib
from google.colab import drive
from spacy.matcher import PhraseMatcher

# Mount Google Drive
drive.mount('/content/drive')

# Load NLP model
nlp = spacy.load("en_core_web_sm")

# Load dataset with symptom synonyms
csv_path = "/content/drive/My Drive/expanded_symptoms_dataset.csv"
symptom_df = pd.read_csv(csv_path)

# Create symptom synonym dictionary
symptom_synonyms = {
    'fever': ['fever', 'high temperature', 'pyrexia', 'febrile'],
    'cough': ['cough', 'coughing', 'hacking cough', 'dry cough'],
    'headache': ['headache', 'migraine', 'head pain'],
    # Add synonyms for all symptoms
}
known_symptoms = list(symptom_synonyms.keys())

# Create PhraseMatcher for symptom detection
matcher = PhraseMatcher(nlp.vocab)
patterns = [nlp(syn) for symptom in symptom_synonyms.values() for syn in symptom]
matcher.add("SYMPTOMS", None, *patterns)

def extract_medical_info(text):
    doc = nlp(text.lower())
    age = None
    symptoms = []
    negated_symptoms = []

    # Age extraction with improved regex
    age_pattern = r"\b(\d{1,3})\s*(?:years?|yrs?|y/o|yo|old|y\.o\.)?\b"
    age_match = re.search(age_pattern, text)
    if age_match:
        age = int(age_match.group(1))

    # Symptom extraction with negation detection
    matches = matcher(doc)
    for match_id, start, end in matches:
        span = doc[start:end]

        # Check for negation in the sentence
        is_negated = False
        for token in span:
            # Check if any ancestor token is a negation
            for ancestor in token.ancestors:
                if ancestor.lower_ in ["no", "not", "without", "never"]:
                    is_negated = True
                    break
            # Check if any child token is a negation
            for child in token.children:
                if child.lower_ in ["no", "not", "without", "never"]:
                    is_negated = True
                    break

        if is_negated:
            negated_symptoms.append(span.text)
        else:
            symptoms.append(span.text)

    # Normalize symptoms using synonym dictionary
    normalized_symptoms = []
    for symptom in symptoms:
        for key, synonyms in symptom_synonyms.items():
            if symptom in synonyms:
                normalized_symptoms.append(key)
                break

    return {
        "age": age,
        "symptoms": list(set(normalized_symptoms)),
        "negated_symptoms": list(set(negated_symptoms))
    }
mlb_X = MultiLabelBinarizer()
mlb_y = MultiLabelBinarizer()

# Then proceed with data preparation
X = mlb_X.fit_transform(
    symptom_df["Symptom"].apply(lambda x: [x.lower()])
)
y = mlb_y.fit_transform(
    symptom_df["Associated Diseases"].apply(eval)
)
# The input data should be binary encoded symptoms
X = mlb_X.fit_transform(
    symptom_df["Symptom"].apply(lambda x: [x.lower()])
)

# The target should be multi-label encoded diseases
y = mlb_y.fit_transform(
    symptom_df["Associated Diseases"].apply(eval)
)

# Use correct classifier setup
from sklearn.multioutput import MultiOutputClassifier

model = MultiOutputClassifier(
    RandomForestClassifier(n_estimators=200,
                          class_weight='balanced',
                          random_state=42)
)
model.fit(X, y)

# Save artifacts
def save_artifacts():
    joblib.dump(model, "/content/drive/My Drive/symptom_disease_model.pkl")
    joblib.dump(mlb_X, "/content/drive/My Drive/mlb_X.pkl")
    joblib.dump(mlb_y, "/content/drive/My Drive/mlb_y.pkl")
    joblib.dump(symptom_synonyms, "/content/drive/My Drive/symptom_synonyms.pkl")

save_artifacts()

# Enhanced prediction function with confidence scores
def predict_diseases_with_confidence(symptoms, threshold=0.3):
    input_vector = mlb_X.transform([symptoms])

    # Get probabilities for each class
    probabilities = model.predict_proba(input_vector)

    diseases_with_conf = []
    for i, disease in enumerate(mlb_y.classes_):
        # Extract probability for positive class (index 1)
        # Each classifier returns [[prob_neg, prob_pos]]
        confidence = probabilities[i][0][1]

        if confidence >= threshold:
            diseases_with_conf.append((disease, round(confidence, 2)))

    return sorted(diseases_with_conf, key=lambda x: x[1], reverse=True)

# Example usage
user_query = """
I'm a 28yo female experiencing persistent dry cough and mild fever for 3 days.
No headache or chest pain. Some fatigue but no nausea.
"""

processed_data = extract_medical_info(user_query)
print("Extracted Information:")
print(f"Age: {processed_data['age']}")
print(f"Symptoms: {processed_data['symptoms']}")
print(f"Negated Symptoms: {processed_data['negated_symptoms']}")

predictions = predict_diseases_with_confidence(processed_data['symptoms'])
print("\nPredicted Diseases with Confidence Scores:")
for disease, confidence in predictions:
    print(f"- {disease} ({confidence*100}% confidence)")

# ... (keep all previous imports and setup code) ...

def interactive_diagnosis():
    # Load necessary artifacts
    global model, mlb_X, mlb_y, symptom_synonyms
    model = joblib.load("/content/drive/My Drive/symptom_disease_model.pkl")
    mlb_X = joblib.load("/content/drive/My Drive/mlb_X.pkl")
    mlb_y = joblib.load("/content/drive/My Drive/mlb_y.pkl")
    symptom_synonyms = joblib.load("/content/drive/My Drive/symptom_synonyms.pkl")

    print("""
    ==============================================
    🩺 CuraConnectAI - Symptom Checker Assistant
    ==============================================
    Hello! I'm here to help you understand your symptoms.
    Please describe how you're feeling in your own words.
    You can include:
    - Your age
    - Symptoms you're experiencing
    - Symptoms you're NOT experiencing
    - Duration of symptoms
    - Any other relevant information

    Type 'exit' at any time to quit.
    """)

    while True:
        user_input = input("\n📝 How can I help you today? \n> ").strip()

        if user_input.lower() in ['exit', 'quit']:
            print("\n👋 Thank you for using CuraConnectAI. Stay healthy!")
            break

        try:
            # Process input
            processed_data = extract_medical_info(user_input)

            # Show extraction results
            print("\n🔍 Analysis of your description:")
            print(f"   Age: {processed_data['age'] or 'Not specified'}")
            print(f"   Detected symptoms: {', '.join(processed_data['symptoms']) or 'None'}")
            print(f"   Ruled out symptoms: {', '.join(processed_data['negated_symptoms']) or 'None'}")

            # Get predictions
            if processed_data['symptoms']:
                predictions = predict_diseases_with_confidence(processed_data['symptoms'])

                print("\n🩺 Potential Health Insights:")
                if predictions:
                    for i, (disease, confidence) in enumerate(predictions[:3], 1):
                        print(f"{i}. {disease} ({confidence*100:.1f}% confidence)")
                        # Add basic health advice
                        if confidence > 0.7:
                            print("   💡 Consider seeking medical attention")
                        elif confidence > 0.4:
                            print("   💡 Monitor symptoms and consult if they worsen")
                        else:
                            print("   💡 May not require immediate attention")
                else:
                    print("No clear patterns detected. Consider consulting a healthcare professional.")

                # Show disclaimer
                print("\n⚠️ Disclaimer: This is not a substitute for professional medical advice.")
            else:
                print("\n❓ No clear symptoms detected. Please describe how you're feeling in more detail.")

            # Suggest related symptoms
            if processed_data['symptoms']:
                related_symptoms = set()
                for symptom in processed_data['symptoms']:
                    for s in symptom_synonyms.get(symptom, []):
                        related_symptoms.update(symptom_synonyms.get(s, []))
                related_symptoms -= set(processed_data['symptoms'])
                if related_symptoms:
                    print("\nℹ️ You might want to mention if you're experiencing:")
                    print("   " + ", ".join(related_symptoms))

        except Exception as e:
            print(f"\n⚠️ Error processing your input: {str(e)}")
            print("Please try describing your symptoms differently.")

# Run the interactive interface
if __name__ == "__main__":
    interactive_diagnosis()
# ... [Keep all previous imports and setup code] ...

def interactive_diagnosis():
    # Load necessary artifacts
    global model, mlb_X, mlb_y, symptom_synonyms
    model = joblib.load("/content/drive/My Drive/symptom_disease_model.pkl")
    mlb_X = joblib.load("/content/drive/My Drive/mlb_X.pkl")
    mlb_y = joblib.load("/content/drive/My Drive/mlb_y.pkl")
    symptom_synonyms = joblib.load("/content/drive/My Drive/symptom_synonyms.pkl")

    print("""
    🌟 Welcome to CuraConnectAI - Your Personal Health Assistant 🌟

    I'm here to help you understand your symptoms and guide you to appropriate care.
    Let's work together to assess your situation step by step.

    You can tell me about:
    • How you're feeling right now
    • Symptoms you've noticed
    • How long they've lasted
    • Any existing health conditions

    Type 'exit' at any time to end our chat.
    """)

    while True:
        user_input = input("\n💬 How are you feeling today? \n> ").strip()

        if user_input.lower() in ['exit', 'quit', 'bye']:
            print("\n🌈 Thank you for using CuraConnectAI! Wishing you good health! 🌈")
            break

        try:
            # Process input
            processed_data = extract_medical_info(user_input)

            # Show extraction results with emoji visualization
            print("\n🔍 Here's what I understand from your description:")
            print(f"   👤 Age: {processed_data['age'] or 'Not mentioned'}")
            print(f"   🤒 Reported symptoms: {', '.join(processed_data['symptoms']) or 'None yet'}")
            print(f"   ✅ Ruled out symptoms: {', '.join(processed_data['negated_symptoms']) or 'None'}")

            # Handle age missing case
            if not processed_data['age']:
                print("\n⚠️ Age helps improve accuracy. Could you please share your age?")
                age = input("   Please enter your age (or 'skip' to continue): ")
                if age.isdigit():
                    processed_data['age'] = int(age)

            if processed_data['symptoms']:
                predictions = predict_diseases_with_confidence(processed_data['symptoms'])

                print("\n📊 Health Insights Analysis:")
                if predictions:
                    print("Based on your symptoms, here are possible considerations:")
                    for i, (disease, confidence) in enumerate(predictions[:3], 1):
                        print(f"\n{i}. {disease} ({confidence*100:.1f}% match)")
                        print(f"   📝 Description: {MEDICAL_KB[disease]['description']}")

                        # Personalized recommendations
                        print("   🛡️ Recommended Actions:")
                        for precaution in MEDICAL_KB[disease]['precautions'][:3]:
                            print(f"    • {precaution}")

                        # Urgency indicator
                        urgency = MEDICAL_KB[disease]['urgency']
                        print(f"   ⚠️ Urgency Level: {urgency_map[urgency]}")

                    # Show most critical action first
                    print("\n🚨 Most Important Next Steps:")
                    top_condition = predictions[0][0]
                    print(f"• {MEDICAL_KB[top_condition]['precautions'][0]}")

                    # Add general health tips
                    print("\n💡 General Wellness Tips:")
                    print("• Stay hydrated with water or electrolyte drinks")
                    print("• Monitor your temperature regularly")
                    print("• Get adequate rest and avoid strenuous activities")
                else:
                    print("No clear patterns detected. Let's try again with more details.")

                # Enhanced disclaimer
                print("\n🔔 Important Note:")
                print("This analysis is not a substitute for professional medical advice.")
                print("If symptoms worsen or you experience any of these warning signs:")
                print("• Difficulty breathing • Severe pain • Confusion • High fever (≥103°F)")
                print("➡️ Seek immediate medical attention")

                # Symptom tracking suggestions
                print("\n📋 Symptom Tracking Recommendations:")
                print("Consider monitoring these every 4-6 hours:")
                print("• Body temperature • Symptom severity (1-10 scale)")
                print("• Hydration levels • Any new symptoms")

                # Related symptom suggestions with categories
                related = get_related_symptoms(processed_data['symptoms'])
                if related:
                    print("\n🔍 You might want to check for these related symptoms:")
                    print("Common accompanying symptoms:")
                    print(f"   • {', '.join(related[:3])}")
                    print("Important to monitor:")
                    print(f"   • {', '.join(related[3:5])}")

            else:
                print("\n🤔 I didn't detect clear symptoms. Could you describe:")
                print("• Specific sensations you're feeling")
                print("• Body areas affected")
                print("• Changes from your normal health")
                print("Example: 'I've had a throbbing headache and nausea since yesterday'")

        except Exception as e:
            print(f"\n❌ Oops! Something went wrong: {str(e)}")
            print("Let's try that again. Could you describe your symptoms differently?")
            print("Example: 'I'm experiencing chest pain and dizziness for 2 hours'")

# Additional helper functions
urgency_map = {
    'Critical': '🔴 Immediate Emergency Care Needed',
    'High': '🟠 Seek Medical Attention Today',
    'Medium': '🟡 Schedule Doctor Visit Soon',
    'Low': '🟢 Monitor and Home Care'
}

def get_related_symptoms(detected_symptoms):
    related = []
    for symptom in detected_symptoms:
        related += symptom_synonyms.get(symptom, [])
        related += MEDICAL_KB.get(symptom, {}).get('common_with', [])
    return list(set(related))[:5]

# Run the enhanced interface
if __name__ == "__main__":
    interactive_diagnosis()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at emilyalsentzer/Bio_ClinicalBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu



    🌟 Welcome to CuraConnectAI - Your Personal Health Assistant 🌟
    
    I'm here to help you understand your symptoms and guide you to appropriate care.
    Let's work together to assess your situation step by step.
    
    You can tell me about:
    • How you're feeling right now
    • Symptoms you've noticed
    • How long they've lasted
    • Any existing health conditions
    
    Type 'exit' at any time to end our chat.
    

🔍 Here's what I understand from your description:
   👤 Age: Not mentioned
   🤒 Reported symptoms: None yet
   ✅ Ruled out symptoms: None

🤔 I didn't detect clear symptoms. Could you describe:
• Specific sensations you're feeling
• Body areas affected
• Changes from your normal health
Example: 'I've had a throbbing headache and nausea since yesterday'

🔍 Here's what I understand from your description:
   👤 Age: Not mentioned
   🤒 Reported symptoms: None yet
   ✅ Ruled out symptoms: None

🤔 I didn't detect clear symptoms. Could you describe:
• Specific sen