In [2]:
from langchain.llms.openai import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI

llm = OpenAI(model="gpt-3.5-turbo-instruct")
chat = ChatOpenAI(model="gpt-4o-mini")

### Structure Data for Fine-Tuning

In [3]:
# Example cases in a readable format
patient1 = {
    "patient_name": "Sarah Johnson",
    "age": 40,
    "gender": "Female",
    "primary_complaint": "Neck pain",
    "duration_of_symptoms": "1 week",
    "pain_severity": 4,
    "pain_description": "Dull ache, localized to the base of the skull",
    "previous_injuries_or_surgeries": "None",
    "medical_history": "Anxiety",
    "current_medications": "None",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Worsens with poor posture, especially when using the computer",
    "symptom_relievers": "Improves with rest and over-the-counter pain relievers",
    "daily_activities_affected": "Difficulty turning head when driving",
    "lifestyle_factors": "Sedentary lifestyle, works long hours at a desk job",
    "dermatome_myotome_involvement": "Selected discomfort at the base of the skull (C2-C3 area) on the dermatome chart, no significant muscle weakness reported on the myotome chart.",
    "pain_patterns": "Localized, no radiating pain.",
    "goals_for_physical_therapy": "Pain relief and improved range of motion",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "10%",
        "orthopedic_muscular_problem": "90%",
    },
    "diagnosis_probability": {
        "cervical_strain": "80%",
        "cervicogenic_headache": "20%",
        "cervical_radiculopathy": "5%",
    },
}
patient2 = {
    "patient_name": "John Doe",
    "age": 55,
    "gender": "Male",
    "primary_complaint": "Lower back pain",
    "duration_of_symptoms": "3 weeks",
    "pain_severity": 7,
    "pain_description": "Sharp pain radiating down the right leg",
    "previous_injuries_or_surgeries": "None",
    "medical_history": "Hypertension, Type 2 Diabetes",
    "current_medications": "Metformin, Lisinopril",
    "red_flag_symptoms": "Unexplained weight loss, Numbness in legs",
    "symptom_triggers": "Pain worsens with prolonged sitting and physical activity",
    "symptom_relievers": "Temporary relief with lying down, but pain persists",
    "daily_activities_affected": "Difficulty sitting for long periods, trouble lifting objects",
    "lifestyle_factors": "History of smoking, works in a physically demanding job",
    "dermatome_myotome_involvement": "Marked pain radiating from lower back down the right leg on the dermatome chart (L4-L5 involvement); weakness in the right leg muscles corresponding to the L4-L5 myotomes.",
    "pain_patterns": "Radiating pain following the L4-L5 dermatome.",
    "goals_for_physical_therapy": "Pain relief, improved mobility",
    "classification": "Serious pathology (Referral required)",
    "differentiation_probability": {
        "neurological_problem": "70%",
        "orthopedic_muscular_problem": "30%",
    },
    "diagnosis_probability": {
        "lumbar_radiculopathy": "70%",
        "lumbar_spinal_stenosis": "40%",
        "degenerative_disc_disease": "20%",
    },
}
patient3 = {
    "patient_name": "Emily Zhang",
    "age": 32,
    "gender": "Female",
    "primary_complaint": "Persistent headaches",
    "duration_of_symptoms": "2 months",
    "pain_severity": 5,
    "pain_description": "Throbbing pain, predominantly on the right side of the head",
    "previous_injuries_or_surgeries": "None",
    "medical_history": "Migraines",
    "current_medications": "Sumatriptan as needed",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Stress, lack of sleep",
    "symptom_relievers": "Rest, dark room, and medication",
    "daily_activities_affected": "Difficulty focusing at work, sensitivity to light",
    "lifestyle_factors": "High-stress job, irregular sleep patterns",
    "dermatome_myotome_involvement": "Marked sensitivity in the C2-C3 dermatome area; no significant muscle weakness reported.",
    "pain_patterns": "Throbbing pain in the right side of the head, no associated neurological symptoms.",
    "goals_for_physical_therapy": "Symptom management and headache prevention",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "20%",
        "orthopedic_muscular_problem": "80%",
    },
    "diagnosis_probability": {
        "tension_headache": "70%",
        "cervicogenic_headache": "30%",
        "migraine": "25%",
    },
}
patient4 = {
    "patient_name": "James Williams",
    "age": 62,
    "gender": "Male",
    "primary_complaint": "Numbness and tingling in hands",
    "duration_of_symptoms": "6 weeks",
    "pain_severity": 3,
    "pain_description": "N/A",
    "previous_injuries_or_surgeries": "None",
    "medical_history": "Type 2 Diabetes, High blood pressure",
    "current_medications": "Metformin, Lisinopril",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Worse in the morning, improves slightly during the day",
    "symptom_relievers": "Shaking hands, moving fingers",
    "daily_activities_affected": "Difficulty gripping objects, typing",
    "lifestyle_factors": "Moderate exercise routine, balanced diet",
    "dermatome_myotome_involvement": "Marked numbness in the C6-C8 dermatome areas; mild weakness in hand muscles corresponding to the C8 myotome.",
    "pain_patterns": "No significant pain, mainly numbness and tingling.",
    "goals_for_physical_therapy": "Improve hand function, reduce numbness",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "60%",
        "orthopedic_muscular_problem": "40%",
    },
    "diagnosis_probability": {
        "peripheral_neuropathy": "60%",
        "cervical_radiculopathy": "40%",
        "carpal_tunnel_syndrome": "15%",
    },
}
patient5 = {
    "patient_name": "Laura Davis",
    "age": 29,
    "gender": "Female",
    "primary_complaint": "Weakness in the legs",
    "duration_of_symptoms": "2 months",
    "pain_severity": 2,
    "pain_description": "N/A",
    "previous_injuries_or_surgeries": "None",
    "medical_history": "None",
    "current_medications": "None",
    "red_flag_symptoms": "Sudden onset of weakness, no significant pain",
    "symptom_triggers": "Worse after prolonged standing",
    "symptom_relievers": "Rest",
    "daily_activities_affected": "Difficulty walking long distances, fatigue",
    "lifestyle_factors": "Active lifestyle, regular runner",
    "dermatome_myotome_involvement": "Marked weakness in muscles associated with the L4-L5 myotomes; no significant dermatomal pain.",
    "pain_patterns": "Minimal pain, primarily weakness.",
    "goals_for_physical_therapy": "Improve leg strength and endurance",
    "classification": "Serious pathology (Referral required)",
    "differentiation_probability": {
        "neurological_problem": "80%",
        "orthopedic_muscular_problem": "20%",
    },
    "diagnosis_probability": {
        "multiple_sclerosis": "50%",
        "peripheral_neuropathy": "30%",
        "lumbar_radiculopathy": "20%",
    },
}
patient6 = {
    "patient_name": "Michael Brown",
    "age": 47,
    "gender": "Male",
    "primary_complaint": "Shoulder pain",
    "duration_of_symptoms": "3 weeks",
    "pain_severity": 6,
    "pain_description": "Sharp pain when lifting arm, dull ache at rest",
    "previous_injuries_or_surgeries": "Rotator cuff surgery 2 years ago",
    "medical_history": "None",
    "current_medications": "Ibuprofen as needed",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Lifting arm above shoulder level, reaching",
    "symptom_relievers": "Rest, ice, over-the-counter pain medication",
    "daily_activities_affected": "Difficulty lifting objects, dressing",
    "lifestyle_factors": "Active lifestyle, plays recreational tennis",
    "dermatome_myotome_involvement": "Pain localized to the shoulder, no dermatomal involvement.",
    "pain_patterns": "Sharp pain during movement, dull ache at rest.",
    "goals_for_physical_therapy": "Pain relief, restore range of motion",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "10%",
        "orthopedic_muscular_problem": "90%",
    },
    "diagnosis_probability": {
        "rotator_cuff_tear": "70%",
        "shoulder_impingement": "50%",
        "bursitis": "30%",
    },
}
patient7 = {
    "patient_name": "Olivia Martinez",
    "age": 38,
    "gender": "Female",
    "primary_complaint": "Knee pain",
    "duration_of_symptoms": "4 weeks",
    "pain_severity": 5,
    "pain_description": "Dull ache, worse with movement, occasional sharp pain",
    "previous_injuries_or_surgeries": "ACL reconstruction 5 years ago",
    "medical_history": "None",
    "current_medications": "None",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Walking, running, bending",
    "symptom_relievers": "Rest, ice, elevation",
    "daily_activities_affected": "Difficulty walking long distances, stairs",
    "lifestyle_factors": "Active lifestyle, runner",
    "dermatome_myotome_involvement": "No significant dermatomal or myotomal involvement.",
    "pain_patterns": "Dull ache, occasional sharp pain with movement.",
    "goals_for_physical_therapy": "Pain relief, return to running",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "15%",
        "orthopedic_muscular_problem": "85%",
    },
    "diagnosis_probability": {
        "patellofemoral_pain_syndrome": "60%",
        "meniscal_tear": "40%",
        "osteoarthritis": "20%",
    },
}
patient8 = {
    "patient_name": "David Smith",
    "age": 45,
    "gender": "Male",
    "primary_complaint": "Elbow pain",
    "duration_of_symptoms": "6 weeks",
    "pain_severity": 4,
    "pain_description": "Sharp pain on the outer elbow, worsens with gripping",
    "previous_injuries_or_surgeries": "None",
    "medical_history": "None",
    "current_medications": "None",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Gripping, lifting",
    "symptom_relievers": "Rest, ice, avoiding triggers",
    "daily_activities_affected": "Difficulty lifting objects, using tools",
    "lifestyle_factors": "Active lifestyle, recreational golfer",
    "dermatome_myotome_involvement": "No significant dermatomal or myotomal involvement.",
    "pain_patterns": "Sharp pain on the outer elbow, especially with gripping.",
    "goals_for_physical_therapy": "Pain relief, return to golf",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "5%",
        "orthopedic_muscular_problem": "95%",
    },
    "diagnosis_probability": {
        "lateral_epicondylitis": "80%",
        "radial_tunnel_syndrome": "20%",
        "tendinitis": "40%",
    },
}
patient9 = {
    "patient_name": "Sophia Johnson",
    "age": 54,
    "gender": "Female",
    "primary_complaint": "Hip pain",
    "duration_of_symptoms": "3 months",
    "pain_severity": 6,
    "pain_description": "Aching pain in the hip, radiates to the thigh",
    "previous_injuries_or_surgeries": "None",
    "medical_history": "Osteoporosis",
    "current_medications": "Calcium supplements, Vitamin D",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Walking, standing for long periods",
    "symptom_relievers": "Rest, sitting",
    "daily_activities_affected": "Difficulty walking, standing",
    "lifestyle_factors": "Sedentary lifestyle",
    "dermatome_myotome_involvement": "No significant dermatomal or myotomal involvement.",
    "pain_patterns": "Aching pain in the hip, radiates to the thigh.",
    "goals_for_physical_therapy": "Pain relief, improve mobility",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "20%",
        "orthopedic_muscular_problem": "80%",
    },
    "diagnosis_probability": {
        "trochanteric_bursitis": "50%",
        "hip_osteoarthritis": "40%",
        "sacroiliac_joint_dysfunction": "30%",
    },
}
patient10 = {
    "patient_name": "Emma Roberts",
    "age": 50,
    "gender": "Female",
    "primary_complaint": "Neck stiffness",
    "duration_of_symptoms": "5 weeks",
    "pain_severity": 3,
    "pain_description": "Stiffness and mild pain in the neck, worsens in the morning",
    "previous_injuries_or_surgeries": "Whiplash injury 10 years ago",
    "medical_history": "None",
    "current_medications": "None",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Prolonged sitting, sleeping in awkward positions",
    "symptom_relievers": "Heat therapy, gentle stretching",
    "daily_activities_affected": "Difficulty turning head, discomfort when driving",
    "lifestyle_factors": "Sedentary lifestyle, works long hours at a desk job",
    "dermatome_myotome_involvement": "Mild discomfort in the C5-C6 dermatome areas; no significant muscle weakness.",
    "pain_patterns": "Stiffness and mild pain, especially in the morning.",
    "goals_for_physical_therapy": "Improve neck mobility, reduce stiffness",
    "classification": "Treatable pathology (Physical therapy)",
    "differentiation_probability": {
        "neurological_problem": "15%",
        "orthopedic_muscular_problem": "85%",
    },
    "diagnosis_probability": {
        "cervical_spondylosis": "60%",
        "muscle_strain": "40%",
        "facet_joint_dysfunction": "25%",
    },
}

readable_data = [
    patient1,
    patient2,
    patient3,
    patient4,
    patient5,
    patient6,
    patient7,
    patient8,
    patient9,
    patient10,
]

### Convert Data to Fine-Tuning Format

In [4]:
import json


def convert_to_chat_format(data):
    chat_data = []

    for example in data:
        # Create a system message
        system_message = (
            "The system processes medical data to assist in differential diagnosis."
        )

        # Create a user message by combining the patient's information
        user_message = f"""Patient Name: {example['patient_name']}
Age: {example['age']}
Gender: {example['gender']}
Primary Complaint: {example['primary_complaint']}
Duration of Symptoms: {example['duration_of_symptoms']}
Pain Severity (0-10): {example['pain_severity']}
Pain Description: {example['pain_description']}
Previous Injuries/Surgeries: {example['previous_injuries_or_surgeries']}
Medical History: {example['medical_history']}
Current Medications: {example['current_medications']}
Red Flag Symptoms: {example['red_flag_symptoms']}
Symptom Triggers: {example['symptom_triggers']}
Symptom Relievers: {example['symptom_relievers']}
Daily Activities Affected: {example['daily_activities_affected']}
Lifestyle Factors: {example['lifestyle_factors']}
Dermatome/Myotome Involvement: {example['dermatome_myotome_involvement']}
Pain Patterns: {example['pain_patterns']}
Goals for Physical Therapy: {example['goals_for_physical_therapy']}"""

        # Create an assistant message with the classification and diagnosis probabilities
        diagnosis_probability_str = "\n".join(
            [
                f"{key}: {value}"
                for key, value in example["diagnosis_probability"].items()
            ]
        )
        assistant_message = f"""
        Classification: {example['classification']}
        Differentiation Probability:
Neurological Problem: {example['differentiation_probability'].get('neurological_problem', 'N/A')}
Orthopedic/Muscular Problem: {example['differentiation_probability'].get('orthopedic_muscular_problem', 'N/A')}
Diagnosis Probability:
{diagnosis_probability_str}"""

        # Structure the data into the required chat format
        chat_format = {
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message},
                {"role": "assistant", "content": assistant_message},
            ]
        }
        chat_data.append(chat_format)

    return chat_data


# Convert the examples to the chat format
chat_data = convert_to_chat_format(readable_data)

# Save the formatted data to a JSONL file
with open("fine_tuning_data.jsonl", "w") as f:
    for item in chat_data:
        f.write(json.dumps(item) + "\n")

print("Chat data prepared successfully!")

Chat data prepared successfully!


In [5]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from langchain import OpenAI, LLMChain
from langchain.prompts import ChatPromptTemplate
import openai
from dotenv import load_dotenv
import os

app = FastAPI()
load_dotenv()


# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")


# Define the request model for the API
class PatientData(BaseModel):
    patient_name: str
    age: int
    gender: str
    primary_complaint: str
    duration_of_symptoms: str
    pain_severity: int
    pain_description: str
    previous_injuries_or_surgeries: str
    medical_history: str
    current_medications: str
    red_flag_symptoms: str
    symptom_triggers: str
    symptom_relievers: str
    daily_activities_affected: str
    lifestyle_factors: str
    dermatome_myotome_involvement: str
    pain_patterns: str
    goals_for_physical_therapy: str


# Define the prompt template for Langchain
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
Users will input their symptoms (e.g., pain, weakness, dry mouth). 
You are to analyze the cluster of symptoms to generate a likelihood graph indicating possible diagnoses (e.g., 90% likelihood of Condition A, 60% of Condition B, etc.).
You will NOT provide a definitive diagnosis. The focus will be on suggesting potential causes based on symptom clusters and not on diagnosing or treating the patient.
Make sure to include classification (treatable or serious pathology), differentiation probability (neurological problem : A%, orthopedic muscular_problem : "B%"), and diagnosis probability 
""",
        ),
        (
            "user",
            """
Patient Name: {{patient_data.patient_name}}
Age: {{patient_data.age}}
Gender: {{patient_data.gender}}
Primary Complaint: {{patient_data.primary_complaint}}
Duration of Symptoms: {{patient_data.duration_of_symptoms}}
Pain Severity (0-10): {{patient_data.pain_severity}}
Pain Description: {{patient_data.pain_description}}
Previous Injuries/Surgeries: {{patient_data.previous_injuries_or_surgeries}}
Medical History: {{patient_data.medical_history}}
Current Medications: {{patient_data.current_medications}}
Red Flag Symptoms: {{patient_data.red_flag_symptoms}}
Symptom Triggers: {{patient_data.symptom_triggers}}
Symptom Relievers: {{patient_data.symptom_relievers}}
Daily Activities Affected: {{patient_data.daily_activities_affected}}
Lifestyle Factors: {{patient_data.lifestyle_factors}}
Dermatome/Myotome Involvement: {{patient_data.dermatome_myotome_involvement}}
Pain Patterns: {{patient_data.pain_patterns}}
Goals for Physical Therapy: {{patient_data.goals_for_physical_therapy}}

Based on the above information, provide the classification, differentiation probability, and diagnosis probability:

""",
        ),
    ]
)

### Fine tuning

In [6]:
"""
fine_tune_response = openai.FineTuningJob.create(
    model="gpt-4o-mini-2024-07-18",
    training_file=openai.File.create(file=open("fine_tuning_data.jsonl"), purpose='fine-tune')['id'],
)
"""

'\nfine_tune_response = openai.FineTuningJob.create(\n    model="gpt-4o-mini-2024-07-18",\n    training_file=openai.File.create(file=open("fine_tuning_data.jsonl"), purpose=\'fine-tune\')[\'id\'],\n)\n'

In [7]:
# Wait for fine-tuning to complete and get the fine-tuned model ID
# fine_tuned_model_id = fine_tune_response['fine_tuned_model']
# print(fine_tuned_model_id)

# Initialize the LLM with the fine-tuned model ID
llm = ChatOpenAI(model="ft:gpt-4o-mini-2024-07-18:personal::9yOTrbNi", temperature=0)
llm_chain = LLMChain(llm=llm, prompt=prompt_template)

"""
@app.post("/predict/")
async def predict_diagnosis(patient_data: PatientData):
    try:
        # Generate the output using the fine-tuned model
        output = llm_chain.run(patient_data=patient_data.dict())
        return {"prediction": output}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run the FastAPI app
# Save this as `app.py` and run with `uvicorn app:app --reload`
"""

'\n@app.post("/predict/")\nasync def predict_diagnosis(patient_data: PatientData):\n    try:\n        # Generate the output using the fine-tuned model\n        output = llm_chain.run(patient_data=patient_data.dict())\n        return {"prediction": output}\n    except Exception as e:\n        raise HTTPException(status_code=500, detail=str(e))\n\n# Run the FastAPI app\n# Save this as `app.py` and run with `uvicorn app:app --reload`\n'

In [8]:
# Break down the patient_info into individual components
patient_data = {
    "patient_name": "John Doe",
    "age": 50,
    "gender": "Male",
    "primary_complaint": "Chronic back pain",
    "duration_of_symptoms": "3 months",
    "pain_severity": 7,
    "pain_description": "Sharp, radiating down the left leg",
    "previous_injuries_or_surgeries": "Lumbar discectomy 5 years ago",
    "medical_history": "High blood pressure",
    "current_medications": "Atenolol",
    "red_flag_symptoms": "None",
    "symptom_triggers": "Worsens with physical activity, especially lifting heavy objects",
    "symptom_relievers": "Improves with rest and NSAIDs",
    "daily_activities_affected": "Difficulty bending and lifting objects",
    "lifestyle_factors": "Physically active job, heavy lifting involved",
    "dermatome_myotome_involvement": "Positive straight leg raise test indicating possible nerve compression (L5-S1 area)",
    "pain_patterns": "Radiating pain down the left leg",
    "goals_for_physical_therapy": "Pain relief and return to work",
}

# Save the patient_data to a file
with open("patient_data.json", "w") as json_file:
    json.dump(patient_data, json_file, indent=4)

In [13]:
class PatientData:
    def __init__(self, data):
        for key, value in data.items():
            setattr(self, key, value)


# Load the JSON data directly
with open("patient_data.json", "r") as json_file:
    data = json.load(json_file)

# Convert the dictionary to an object
patient_data = PatientData(data)

# Now you can use attribute-style access
output = llm_chain.run(patient_data=patient_data)
print(output)

Classification: Treatable pathology (Physical therapy)
Differentiation Probability:
Neurological Problem: 40%
Orthopedic/Muscular Problem: 60%
Diagnosis Probability:
tendinitis: 70%
radiculopathy: 30%
bursitis: 25%
