In [48]:
!pip install spacy
!python -m spacy download en_core_web_md


Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [49]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy

In [50]:
# Load the spaCy model for English
nlp = spacy.load('en_core_web_md')



In [56]:
# Function to find similar symptoms using spaCy
def find_similar_symptoms(input_symptom, stored_symptoms, similarity_threshold=0.5):
    input_symptom_doc = nlp(input_symptom)
    similarity_scores = []

    for stored_symptom in stored_symptoms:
        stored_symptom_doc = nlp(stored_symptom)
        similarity_score = input_symptom_doc.similarity(stored_symptom_doc)
        if similarity_score >= similarity_threshold:
            similarity_scores.append((stored_symptom, similarity_score))

    # Sort based on similarity scores in descending order
    similarity_scores.sort(key=lambda x: x[1], reverse=True)

    return similarity_scores[:5]  # Return top 5 similar symptoms

In [57]:
# Function to recommend doctors based on symptoms, specialist type, and pre-existing conditions
def recommend_doctors(new_symptoms, specialist_type, pre_existing_conditions, patient_data):

    # Step 1: Check if there are any doctors matching the specialist type
    filtered_data_by_specialist = patient_data[patient_data['Specialist Type'] == specialist_type]

    if filtered_data_by_specialist.empty:
        return "No doctors found matching the specialist type."

    # Step 2: Check if there are any matches for pre-existing conditions
    filtered_data = filtered_data_by_specialist[
        filtered_data_by_specialist['Pre-existing Conditions'] == pre_existing_conditions
    ]

    # If no exact matches are found for pre-existing conditions, proceed without filtering by it
    if filtered_data.empty:
        print("No exact match for pre-existing conditions, proceeding without it.")
        filtered_data = filtered_data_by_specialist

    # Step 3: Extract symptoms from filtered patient data
    stored_symptoms_list = filtered_data['Symptoms'].tolist()

    # Step 4: Find top similar symptoms using NLP-based similarity matching
    top_similar_symptoms = find_similar_symptoms(new_symptoms, stored_symptoms_list)

    if not top_similar_symptoms:
        return "No similar symptoms found."

    # Step 5: Create a DataFrame of matched symptoms
    matched_symptoms = [symptom[0] for symptom in top_similar_symptoms]
    filtered_data = filtered_data[filtered_data['Symptoms'].isin(matched_symptoms)]

    if filtered_data.empty:
        return "No patients found with similar symptoms."

    # Step 6: Rank doctors based on the average rating of top matches
    top_doctors = filtered_data.groupby(['Doctor Name', 'Doctor ID']).agg(
        avg_rating=('Rating', 'mean'),
        patient_count=('Patient ID', 'count')
    ).sort_values(by='avg_rating', ascending=False)

    return top_doctors

In [58]:
# Load the dataset (replace the path with your file's path if needed)
file_path = 'Final_data.csv'
patient_data = pd.read_csv(file_path)

patient_data.head(5)

Unnamed: 0,Patient ID,Symptoms,Specialist Type,Rating,Age,Gender,Pre-existing Conditions,Doctor Name,Doctor ID
0,325,"vomiting, nausea, swelling, stiffness",Cardiologist,5,43,Female,Asthma,Dr. Angela Hernandez,1000
1,405,"fever, chest pain, swelling",Cardiologist,1,67,Female,Asthma,Dr. Angela Hernandez,1000
2,1006,loss of smell,Cardiologist,3,66,Male,Asthma,Dr. Angela Hernandez,1000
3,617,"vomiting, sore throat, dizziness",Dermatologist,5,63,Male,Diabetes,Dr. Angela Hernandez,1001
4,629,"cough, loss of smell, dizziness",Dermatologist,1,79,Male,Obesity,Dr. Angela Hernandez,1001


In [59]:
# Example usage
print("Enter Symptoms:")
# new_symptoms = "I am having Pharyngitis and chest pain"
new_symptoms = input()

print("\nEnter Specialist Type:")
# specialist_type = "ENT Specialist"
specialist_type = input()

print("\nEnter Pre_Existing_Conditions (if any):")
# pre_existing_conditions = "Diabetes"
pre_existing_conditions = input()

Enter Symptoms:
I am having Pharyngitis and chest pain

Enter Specialist Type:
ENT Specialist

Enter Pre_Existing_Conditions (if any):
Diabetes


In [60]:
# Get the recommended doctors based on the new NLP approach
recommended_doctors = recommend_doctors(new_symptoms, specialist_type, pre_existing_conditions, patient_data)

# Display the top recommended doctors
print(recommended_doctors)

                                avg_rating  patient_count
Doctor Name          Doctor ID                           
Dr. Angela Hernandez 1002              5.0              1
Dr. Karen Wright     1082              4.0              1
Dr. Susan Taylor     1141              3.0              2
Dr. John Smith       1072              2.0              1
