## Process Spreadsheet and Query LLM

In [1]:
import pandas as pd
from openai import OpenAI


In [2]:
n_formal=250
n_informal=1000
ground_truth='SystemicSclerosis'
patient_nominal_gender='female'
patient_nominal_age=10
patient_nominal_weight=54
patient_nominal_height=46

In [3]:
xls = pd.ExcelFile('DiagnosisNumbersSpreadsheet.xlsx')

In [4]:
df1 = pd.read_excel(xls, ground_truth)

In [5]:
import random 

class Symptom:
    def __init__(self, name, pct, informal_descriptions):
        self.name = name
        self.pct = pct 
        self.informal = informal_descriptions
        
    def get_informal_symptom(self):
        if len(self.informal) <= 0 :
            return self.name
        else:
            return random.choice(self.informal)

In [6]:
import math
def process_dataframe_with_symptoms(df):
    current_textbook_symptom_name = None 
    current_textbook_symptom_percentage = None 
    current_informal_descriptions = []
    all_symptoms = [] 
    
    for row in df.iterrows():
        symptom_name = row[1][0]
        symptom_percentage = row[1][1]
        if not math.isnan(symptom_percentage):
            print(symptom_name, float(symptom_percentage)/100.0)
            if current_textbook_symptom_name != None:
                symp = Symptom(current_textbook_symptom_name, current_textbook_symptom_percentage, current_informal_descriptions)
                all_symptoms.append(symp)
            current_textbook_symptom_name = symptom_name
            current_textbook_symptom_percentage = symptom_percentage/100.0
            current_informal_descriptions = [] 
        else:
            assert current_textbook_symptom_name != None 
            current_informal_descriptions.append(symptom_name)
            print(f'\t {symptom_name}')
            
    if current_textbook_symptom_name != None:
        symp = Symptom(current_textbook_symptom_name, current_textbook_symptom_percentage, current_informal_descriptions)
        all_symptoms.append(symp)
    return all_symptoms

In [7]:
all_symptoms = process_dataframe_with_symptoms(df1)

Skin tightening  0.82
	 Hard skin
	 shiny skin
Rayanud phenomenon 0.7
	 Fingers turning blue
	 Fingers turnng white
	 Cold hands
Soft tissue contracture 0.61
	 Tight joint
	 Tight fingers
Artharlgia 0.282
	 Pain
	 Musculoskeletal pain
Muscle weakness and pain 0.152
	 Muscle pain
	 sore muscles
	 Tender muscles
Subcutaneous calcification 0.102
	 Bumps under skin
Dysphagia 0.135
	 Trouble swallowing
	 Trouble swallowing liquids
	 Trouble swallowing solids
	 Food gets stuck in throat
	 Pills get stuck in throat 
Dyspnea  0.125
	 shortness of breath
	 shortness of breath with exertion


In [8]:
import random

def create_symptom_list(all_symptoms, informal=False):
    symptom_list = []
    for s in all_symptoms:
        r = random.random()
        if r <= s.pct:
            if informal:
                symptom_list.append(s.get_informal_symptom())
            else:
                symptom_list.append(s.name)
    return symptom_list

In [12]:
s0 = create_symptom_list(all_symptoms, informal=True)
print(s0)

['Hard skin', 'Cold hands', 'Musculoskeletal pain']


In [13]:
formal_symptom_list = [create_symptom_list(all_symptoms, informal=False) for i in range(n_formal)]

In [14]:
informal_symptom_list=[create_symptom_list(all_symptoms, informal=True) for i in range(n_informal)]

In [15]:
print(informal_symptom_list)

[['shiny skin', 'Cold hands', 'Tight joint', 'Tender muscles'], ['Hard skin', 'Musculoskeletal pain'], ['shiny skin', 'Cold hands', 'Tight joint', 'Musculoskeletal pain'], ['Tender muscles'], ['Cold hands', 'Pain'], ['Tight joint'], ['Hard skin', 'Fingers turning blue', 'Tight fingers'], ['Hard skin', 'Cold hands'], [], ['Hard skin', 'Tight fingers'], ['Hard skin', 'sore muscles'], ['Cold hands', 'Pain'], ['shiny skin', 'Tight fingers'], ['Hard skin', 'Cold hands', 'Tight fingers', 'Musculoskeletal pain', 'Bumps under skin', 'Food gets stuck in throat'], ['Bumps under skin'], ['Hard skin', 'Fingers turnng white', 'Tight fingers'], ['Hard skin', 'Fingers turning blue', 'Tight fingers', 'Musculoskeletal pain', 'Muscle pain'], ['Hard skin', 'Cold hands', 'Tight fingers'], ['shiny skin', 'Cold hands'], ['Hard skin', 'Tight fingers', 'Food gets stuck in throat'], ['Hard skin', 'Fingers turning blue', 'Tight fingers'], ['shiny skin', 'Fingers turnng white', 'Tight joint'], ['Hard skin', 'Col

In [17]:
def make_prompt(symptom_list, gender, age, weight, height):
    patient_descr = f'The patient is a {age} year old {gender} with a weight of {weight} lbs and height of {height} inches.'
    symptoms_str = 'The list of symptoms include '
    for (j, s) in enumerate(symptom_list):
        if j != len(symptom_list)-1:
            symptoms_str = symptoms_str + f' ({j+1}) {s}'
        else:
            symptoms_str = symptoms_str + f' and ({j+1}) {s}.'
    return patient_descr + ' ' + symptoms_str

In [18]:
p0 = make_prompt(s0, patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height)

In [58]:
print(p0)

The patient is a 10 year old female with a weight of 54 lbs and height of 46 inches. The list of symptoms include  (1) Hard skin (2) Cold hands and (3) Musculoskeletal pain.


In [19]:
def prompt_open_ai(prompt_str, model="gpt-3.5-turbo"):
    client = OpenAI()
    response = client.chat.completions.create(
        model=model,
        messages=[
        {
        "role": "system",
        "content": """ I am taking care of a patient and would like your help generating a differential diagnosis.  I will provide details 
        about the patient and the symptoms below. Please output upto 5 conditions that are consistent with the symptoms sorted from the most likely to the least. Your 
        output must be in the format wherein each of the conditions must be in a separate line.
                        1. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>
                        2. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>
                        ...
                        5. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>
                        """
        },
        {
        "role": "user",
        "content": prompt_str
        }
        ],
        temperature=0.7,
        max_tokens=1024,
        top_p=1
    )
    #print(response)
    #print(response.choices[0].message.content)
    return response.choices[0].message.content

In [20]:
r0 = prompt_open_ai(p0)

In [21]:
print(r0)

1. Juvenile idiopathic arthritis, 60%, The patient presents with musculoskeletal pain which is a common symptom of juvenile idiopathic arthritis.  
2. Scleroderma, 20%, Hard skin is a characteristic feature of scleroderma and the patient's presentation is consistent with this condition.  
3. Raynaud's disease, 10%, Cold hands are a common symptom of Raynaud's disease which causes vasospasms in the blood vessels.  
4. Hypothyroidism, 5%, Hypothyroidism can present with musculoskeletal pain and cold intolerance, which may manifest as cold hands in the patient.  
5. Fibromyalgia, 5%, Musculoskeletal pain is a key symptom of fibromyalgia, which could explain the patient's presentation.


In [22]:
import re
def parse_response(response):
    lst_of_lines = response.split('\n')
    results = []
    for (j, line) in enumerate(lst_of_lines): 
        m = re.match(r"(\d+).\s*(.*)\s*,\s*(\d+)%,(.*)", line)
        if m != None:
            results.append((m.group(2), m.group(3), m.group(4)))
    return results

In [23]:
parsed_r0 = parse_response(r0)

In [24]:
print(parsed_r0)

[('Juvenile idiopathic arthritis', '60', ' The patient presents with musculoskeletal pain which is a common symptom of juvenile idiopathic arthritis.  '), ('Scleroderma', '20', " Hard skin is a characteristic feature of scleroderma and the patient's presentation is consistent with this condition.  "), ("Raynaud's disease", '10', " Cold hands are a common symptom of Raynaud's disease which causes vasospasms in the blood vessels.  "), ('Hypothyroidism', '5', ' Hypothyroidism can present with musculoskeletal pain and cold intolerance, which may manifest as cold hands in the patient.  '), ('Fibromyalgia', '5', " Musculoskeletal pain is a key symptom of fibromyalgia, which could explain the patient's presentation.")]


In [25]:
def run_symptoms_list_openai_and_parse_responses(symptoms_list_list, gender, age, weight, height, max_n=None):
    count = 0
    all_parsed_responses = []
    for symptoms_list in symptoms_list_list:
        p0 = make_prompt(symptoms_list, gender, age, weight, height)
        r0 = prompt_open_ai(p0)
        parsed_r0 = parse_response(r0)
        all_parsed_responses.append(parsed_r0)
        count = count + 1
        if max_n != None and count >= max_n:
            return all_parsed_responses
    return all_parsed_responses

In [26]:
all_parsed_responses_formal = run_symptoms_list_openai_and_parse_responses(formal_symptom_list, patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height, max_n=None)

In [27]:
print(all_parsed_responses_formal)

[[('Juvenile Systemic Sclerosis', '70', ' The patient presents with skin tightening, Raynaud phenomenon, soft tissue contracture, and dyspnea, which are all characteristic symptoms of juvenile systemic sclerosis. '), ('Juvenile Dermatomyositis', '15', ' Muscle weakness and pain along with arthralgia are common symptoms of juvenile dermatomyositis in children. '), ('Mixed Connective Tissue Disease', '10', ' The combination of skin tightening, Raynaud phenomenon, soft tissue contracture, and dyspnea could also be seen in mixed connective tissue disease. '), ('Juvenile Idiopathic Arthritis', '3', ' Arthralgia and muscle weakness are common symptoms in juvenile idiopathic arthritis, but the presence of skin tightening and other symptoms make this less likely. '), ('Systemic Lupus Erythematosus', '2', ' While some symptoms overlap (arthralgia, muscle weakness, dyspnea), the skin tightening and soft tissue contracture are less common in systemic lupus erythematosus in children.')], [('System

In [28]:
def dump_responses_to_file(symptoms_list_list, all_parsed_responses, filename, gender, age, weight, height):
    filehandle = open(filename, 'w')
    count = 0
    for (symptoms_list, parsed_response) in zip(symptoms_list_list, all_parsed_responses):
        count = count + 1
        print(f'P: {count}, {gender}, {age}, {weight}, {height}', file=filehandle)
        for s in symptoms_list:
            print(f'S:{s}', file=filehandle)
        for (j, r) in enumerate(parsed_response):
            (disease_name, prob, descr) = r
            print(f'R: {j+1}, {disease_name}, {prob}, {descr}', file=filehandle)
    filehandle.close()

In [29]:
dump_responses_to_file(formal_symptom_list, all_parsed_responses_formal, f'{ground_truth}-formal-chatgpt-3.5.txt', patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height)

In [90]:
top_responses_formal = [resp[0][0] for resp in all_parsed_responses_formal if len(resp) >= 1]

In [91]:
def get_frequencies(lst_of_top_responses):
    lower_case_responses = [s.lower() for s in lst_of_top_responses]
    return {i:lower_case_responses.count(i) for i in set(lower_case_responses)}

In [92]:
print('Top Responses')
d= get_frequencies(top_responses_formal)
display(d)

Top Responses


{'juvenile idiopathic arthritis': 35,
 'juvenile systemic sclerosis': 119,
 '**juvenile systemic sclerosis**': 1,
 'esophageal disorders': 2,
 'juvenile idiopathic arthritis (jia)': 2,
 'systemic lupus erythematosus (sle)': 1,
 'juvenile dermatomyositis': 46,
 'juvenile systemic scleroderma': 1,
 'systemic sclerosis': 38,
 'systemic sclerosis (scleroderma)': 3}

In [93]:
sum = 0
tot = 0
for (s, f) in d.items():
    if 'scler' in s:
        print(s)
        sum = sum +f
    tot = tot+f
print(sum/tot*100)

juvenile systemic sclerosis
**juvenile systemic sclerosis**
juvenile systemic scleroderma
systemic sclerosis
systemic sclerosis (scleroderma)
65.32258064516128


In [94]:
top_three_tmp = [[resp[0][0], resp[1][0], resp[2][0]] for resp in all_parsed_responses_formal if len(resp) >= 3]
top_three_responses_formal = [x for lst in top_three_tmp for x in lst]

In [95]:
tot = 0
sum = 0
for lst in top_three_tmp:
    tot = tot +1
    if 'scler' in lst[0].lower() or 'scler' in lst[1].lower() or 'scler' in lst[2].lower():
        sum = sum + 1
print(sum, tot)
print(sum/tot*100)

233 248
93.95161290322581


In [96]:
print('Top three responses')
d = get_frequencies(top_three_responses_formal)
display(d)

Top three responses


{'scleroderma': 29,
 'juvenile idiopathic arthritis': 92,
 "sjogren's syndrome": 1,
 'lupus': 13,
 'rheumatic fever': 2,
 'polymyositis': 1,
 '**ehlers-danlos syndrome**': 1,
 'eosinophilic esophagitis': 6,
 'lipodystrophy': 1,
 'asthma exacerbation': 1,
 'systemic sclerosis': 62,
 'systemic sclerosis (scleroderma)': 37,
 '**juvenile systemic sclerosis**': 1,
 'cystic fibrosis': 3,
 'mixed connective tissue disease': 17,
 'mixed connective tissue disease (mctd)': 12,
 'congenital myopathies': 1,
 'ehlers-danlos syndrome': 50,
 'progressive systemic sclerosis': 1,
 'esophageal disorders': 2,
 'morphea': 1,
 'lupus erythematosus': 1,
 'juvenile systemic scleroderma': 1,
 'crest syndrome': 1,
 'muscular dystrophy': 5,
 'hypoparathyroidism': 3,
 'interstitial lung disease': 1,
 'systemic lupus erythematosus': 25,
 'idiopathic pulmonary fibrosis': 4,
 'dermatomyositis': 4,
 'juvenile systemic sclerosis': 122,
 'esophageal motility disorders': 1,
 '**dermatomyositis**': 1,
 'eosinophilic fas

In [84]:
def count_scler(d):
    sum = 0
    tot = 0
    for (s, f) in d.items():
        if 'scler' in s:
            print(s)
            sum = sum +f
        tot = tot+f
    print(sum/tot*100)
    return sum/tot*100

In [39]:
all_parsed_responses_informal = run_symptoms_list_openai_and_parse_responses(informal_symptom_list, patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height, max_n=None)

In [40]:
print(all_parsed_responses_informal)

[[('Juvenile Idiopathic Arthritis', '70', ' The symptoms of tight joints and tender muscles are consistent with juvenile idiopathic arthritis, which is common in children and can cause inflammation in the joints leading to stiffness and pain.'), ('Hypothyroidism', '20', ' Shiny skin and cold hands can be indicative of hypothyroidism, a condition where the thyroid gland does not produce enough hormones, leading to a slower metabolism and symptoms like cold intolerance and changes in skin texture.'), ('Juvenile Dermatomyositis', '5', ' The combination of shiny skin, tender muscles, and joint tightness could be seen in juvenile dermatomyositis, an autoimmune condition that affects the skin and muscles in children.'), ('Systemic Lupus Erythematosus (SLE)', '3', ' SLE is a chronic autoimmune disease that can cause joint pain and muscle tenderness, which are symptoms seen in this patient along with shiny skin.'), ("Raynaud's Phenomenon", '2', " Cold hands and shiny skin can be seen in Raynau

In [41]:
dump_responses_to_file(informal_symptom_list, all_parsed_responses_informal, f'{ground_truth}-informal-chatgpt-3.5.txt', patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height)

In [85]:
print('Top Responses')
top_responses_informal = [resp[0][0] for resp in all_parsed_responses_informal if len(resp) >= 1]
d = get_frequencies(top_responses_informal)
display(d)
count_scler(d)

Top Responses


{'scleroderma': 77,
 'juvenile idiopathic arthritis': 248,
 'esophageal stricture': 9,
 'lipoma': 5,
 'eosinophilic esophagitis': 16,
 'systemic sclerosis': 38,
 'systemic sclerosis (scleroderma)': 2,
 'cystic fibrosis': 1,
 'marfan syndrome': 1,
 'ehlers-danlos syndrome': 9,
 'iron-deficiency anemia': 1,
 'iron deficiency anemia': 1,
 'juvenile localized scleroderma (morphea)': 1,
 'juvenile systemic sclerosis': 105,
 "cushing's syndrome": 13,
 'asthma': 2,
 'epidermolysis bullosa': 1,
 'juvenile dermatomyositis': 69,
 'juvenile idiopathic arthritis (jia)': 2,
 'juvenile hypothyroidism': 1,
 "raynaud's disease": 58,
 'hyperthyroidism': 5,
 'juvenile rheumatoid arthritis': 2,
 'esophageal strictures': 1,
 "raynaud's phenomenon": 300,
 'muscle strain': 1,
 'hypothyroidism': 18}

scleroderma
systemic sclerosis
systemic sclerosis (scleroderma)
juvenile localized scleroderma (morphea)
juvenile systemic sclerosis
22.593718338399192


22.593718338399192

In [87]:
top_three_tmp = [[resp[0][0], resp[1][0], resp[2][0]] for resp in all_parsed_responses_informal if len(resp) >= 3]
top_three_responses_informal = [x for lst in top_three_tmp for x in lst]
print('Top three responses')
d = get_frequencies(top_three_responses_informal)
display(d)

sum = 0
tot = 0
for lst in top_three_tmp:
    lower_lst = [s.lower() for s in lst]
    if any('scler' in s for s in lower_lst):
        sum = sum + 1
    tot = tot +1
print(sum/tot*100)

Top three responses


{'juvenile idiopathic arthritis': 482,
 'esophagitis': 1,
 'heart condition (such as congenital heart disease)': 1,
 'polymyositis': 2,
 'esophageal stricture': 35,
 'dermatofibroma': 3,
 "reynaud's phenomenon": 2,
 'eosinophilic esophagitis': 75,
 'anxiety': 1,
 'systemic sclerosis': 147,
 'osteoarthritis': 1,
 'iron-deficiency anemia': 1,
 'iron deficiency anemia': 3,
 'hypoparathyroidism': 1,
 'rheumatoid nodules': 1,
 'gastroesophageal reflux disease (gerd)': 10,
 'eosinophilic fasciitis': 10,
 'epidermolysis bullosa': 1,
 'influenza': 3,
 'juvenile hypothyroidism': 1,
 "raynaud's disease": 123,
 'juvenile rheumatoid arthritis': 9,
 'anemia': 16,
 'scleroderma': 291,
 'esophageal dysmotility': 1,
 "sjogren's syndrome": 1,
 'lipoma': 6,
 'exercise-induced asthma': 1,
 'systemic sclerosis (scleroderma)': 139,
 'cystic fibrosis': 7,
 'inflammatory myopathy': 1,
 'angioedema': 1,
 'pneumonia': 1,
 'fibromyalgia': 9,
 'steatocystoma multiplex': 3,
 'growing pains': 3,
 "cushing's syndro

66.359918200409


In [44]:
print(p0)

The patient is a 10 year old female with a weight of 54 lbs and height of 46 inches. The list of symptoms include  (1) Hard skin (2) Cold hands and (3) Musculoskeletal pain.


In [59]:
import anthropic


anthropic_client = anthropic.Anthropic(
    api_key="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",
)

message = anthropic_client.messages.create(
    model="claude-3-sonnet-20240229",
    max_tokens=1000,
    temperature=0,
    system="I am taking care of a patient and would like your help generating a differential diagnosis.  I will provide details \n        about the patient and the symptoms below. Please output upto 5 conditions that are consistent with the symptoms sorted from the most likely to the least. Your \n        output must be in the format wherein each of the conditions must be in a separate line.\n                        1. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>\n                        2. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>\n                        ...\n                        5. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"{p0}"
                }
            ]
        }
    ]
)
print(message.content)

[TextBlock(text="1. Scleroderma, 40%, Hard skin and musculoskeletal pain are common symptoms of scleroderma, an autoimmune disorder that causes hardening and tightening of the skin and connective tissues.\n\n2. Raynaud's phenomenon, 30%, Cold hands and musculoskeletal pain can be associated with Raynaud's phenomenon, a condition that causes decreased blood flow to the extremities, often triggered by cold temperatures or stress.\n\n3. Juvenile idiopathic arthritis, 20%, Musculoskeletal pain can be a symptom of juvenile idiopathic arthritis, an autoimmune disorder that causes joint inflammation and stiffness in children.\n\n4. Dermatomyositis, 15%, Hard skin and musculoskeletal pain can be symptoms of dermatomyositis, an inflammatory condition that affects the skin and muscles.\n\n5. Mixed connective tissue disease, 10%, The combination of hard skin, cold hands, and musculoskeletal pain can be seen in mixed connective tissue disease, an autoimmune disorder that shares features of several

In [60]:
parse_response(message.content[0].text)

[('Scleroderma',
  '40',
  ' Hard skin and musculoskeletal pain are common symptoms of scleroderma, an autoimmune disorder that causes hardening and tightening of the skin and connective tissues.'),
 ("Raynaud's phenomenon",
  '30',
  " Cold hands and musculoskeletal pain can be associated with Raynaud's phenomenon, a condition that causes decreased blood flow to the extremities, often triggered by cold temperatures or stress."),
 ('Juvenile idiopathic arthritis',
  '20',
  ' Musculoskeletal pain can be a symptom of juvenile idiopathic arthritis, an autoimmune disorder that causes joint inflammation and stiffness in children.'),
 ('Dermatomyositis',
  '15',
  ' Hard skin and musculoskeletal pain can be symptoms of dermatomyositis, an inflammatory condition that affects the skin and muscles.'),
 ('Mixed connective tissue disease',
  '10',
  ' The combination of hard skin, cold hands, and musculoskeletal pain can be seen in mixed connective tissue disease, an autoimmune disorder that sha

In [47]:
import anthropic


anthropic_client = anthropic.Anthropic(
    api_key="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",
)

def prompt_anthropic(prompt_str):
    message = anthropic_client.messages.create(
        model="claude-3-sonnet-20240229",
        max_tokens=1000,
        temperature=0,
        system="I am taking care of a patient and would like your help generating a differential diagnosis.  I will provide details \n        about the patient and the symptoms below. Please output upto 5 conditions that are consistent with the symptoms sorted from the most likely to the least. Your \n        output must be in the format wherein each of the conditions must be in a separate line.\n                        1. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>\n                        2. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>\n                        ...\n                        5. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"{prompt_str}"
                    }
                ]
            }
        ]
    )
    return message.content[0].text 

def run_symptoms_list_anthropic_and_parse_responses(symptoms_list_list, gender, age, weight, height, max_n=None):
    count = 0
    all_parsed_responses = []
    for symptoms_list in symptoms_list_list:
        p0 = make_prompt(symptoms_list, gender, age, weight, height)
        r0 = prompt_anthropic(p0)
        parsed_r0 = parse_response(r0)
        all_parsed_responses.append(parsed_r0)
        count = count + 1
        if max_n != None and count >= max_n:
            return all_parsed_responses
    return all_parsed_responses

In [48]:
claude_all_parsed_responses_formal = run_symptoms_list_anthropic_and_parse_responses(formal_symptom_list, patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height, max_n=None)

In [49]:
print(claude_all_parsed_responses_formal)

[[('Systemic Sclerosis (Scleroderma)', '70', " Skin tightening, Raynaud's phenomenon, soft tissue contractures, arthralgia, and muscle weakness are classic symptoms of systemic sclerosis."), ('Mixed Connective Tissue Disease', '15', " The combination of Raynaud's phenomenon, arthralgia, and muscle weakness can be seen in mixed connective tissue disease, which shares features of several autoimmune disorders."), ('Juvenile Dermatomyositis', '10', " The presence of muscle weakness, skin tightening, and Raynaud's phenomenon can be indicative of juvenile dermatomyositis, an inflammatory muscle disease."), ('Juvenile Idiopathic Arthritis', '3', ' Arthralgia and muscle weakness can be seen in some forms of juvenile idiopathic arthritis, although the other symptoms are less common.'), ('Interstitial Lung Disease', '2', ' Dyspnea can be a symptom of interstitial lung disease, which can be associated with some autoimmune disorders like systemic sclerosis.')], [('Systemic Sclerosis (Scleroderma)'

In [50]:
dump_responses_to_file(formal_symptom_list, claude_all_parsed_responses_formal, f'{ground_truth}-formal-claude-3-opus.txt', patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height)

In [51]:
print('Top Responses')
top_responses_formal = [resp[0][0] for resp in claude_all_parsed_responses_formal]
get_frequencies(top_responses_formal)

Top Responses


{'scleroderma': 1,
 'arthrogryposis multiplex congenita (amc)': 4,
 'systemic lupus erythematosus (sle)': 5,
 'celiac disease': 1,
 'juvenile idiopathic arthritis (jia)': 9,
 'juvenile dermatomyositis': 10,
 'systemic sclerosis (scleroderma)': 220}

In [97]:
top_three_tmp = [[resp[0][0], resp[1][0], resp[2][0]] for resp in claude_all_parsed_responses_formal]
top_three_responses_formal = [x for lst in top_three_tmp for x in lst]
print('Top three responses')
d = get_frequencies(top_three_responses_formal)
display(d)

sum = 0
tot = 0
for lst in top_three_tmp:
    lower_lst = [s.lower() for s in lst]
    if any('scler' in s for s in lower_lst):
        sum = sum + 1
    tot = tot +1
print(sum/tot*100)

Top three responses


{'scleroderma': 10,
 'juvenile idiopathic arthritis': 3,
 'lupus': 1,
 'viral arthritis': 2,
 'morphea (localized scleroderma)': 26,
 'eosinophilic esophagitis': 2,
 'lactose intolerance': 1,
 'systemic sclerosis (scleroderma)': 225,
 'mixed connective tissue disease': 22,
 'mixed connective tissue disease (mctd)': 214,
 'arthrogryposis multiplex congenita (amc)': 4,
 'muscular dystrophy': 5,
 'inflammatory bowel disease (ibd)': 1,
 'dermatomyositis': 57,
 'growing pains': 2,
 'cerebral palsy': 4,
 'eosinophilic fasciitis': 3,
 'systemic lupus erythematosus (sle)': 15,
 'celiac disease': 1,
 'juvenile idiopathic arthritis (jia)': 48,
 'juvenile dermatomyositis': 104}

94.0


In [53]:
claude_all_parsed_responses_informal = run_symptoms_list_anthropic_and_parse_responses(informal_symptom_list, patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height, max_n=None)

In [54]:
print(claude_all_parsed_responses_informal)

[[('Juvenile Idiopathic Arthritis (JIA)', '40', ' Shiny skin, tight joints, and tender muscles are common symptoms of JIA, an autoimmune disorder affecting children.'), ('Dermatomyositis', '30', ' Shiny skin (known as "Gottron\'s papules") and muscle weakness/tenderness are characteristic of this inflammatory muscle disease.'), ('Systemic Lupus Erythematosus (SLE)', '15', " Cold hands (Raynaud's phenomenon) and joint pain/stiffness can occur in SLE, an autoimmune disorder affecting multiple organs."), ('Mixed Connective Tissue Disease (MCTD)', '10', " MCTD can present with symptoms similar to lupus, dermatomyositis, and rheumatoid arthritis, including joint pain and Raynaud's phenomenon."), ('Scleroderma', '5', " Tight skin and Raynaud's phenomenon (cold hands) can be seen in scleroderma, an autoimmune disorder causing hardening and tightening of the skin and connective tissues.")], [('Juvenile Idiopathic Arthritis (JIA)', '40', ' Musculoskeletal pain and skin changes like hardening ca

In [55]:
dump_responses_to_file(informal_symptom_list, claude_all_parsed_responses_informal, f'{ground_truth}-informal-claude-3-opus.txt', patient_nominal_gender, patient_nominal_age, patient_nominal_weight, patient_nominal_height)

In [98]:
print('Top Responses')
top_responses_informal = [resp[0][0] for resp in claude_all_parsed_responses_informal]
d = get_frequencies(top_responses_informal)
display(d)
count_scler(d)

Top Responses


{'scleroderma': 154,
 'esophageal stricture': 2,
 'lipoma': 2,
 'eosinophilic esophagitis': 10,
 'juvenile rheumatoid arthritis (jra)': 1,
 'systemic scleroderma (diffuse cutaneous systemic sclerosis)': 26,
 'keratosis pilaris': 22,
 'systemic sclerosis (scleroderma)': 35,
 'diabetes mellitus': 1,
 'cystic fibrosis': 4,
 'lichen planus': 2,
 'eczema': 5,
 'atopic dermatitis (eczema)': 4,
 'dermatomyositis': 41,
 'growing pains': 7,
 'ichthyosis': 2,
 'asthma': 2,
 'systemic lupus erythematosus (sle)': 2,
 'celiac disease': 11,
 'juvenile idiopathic arthritis (jia)': 169,
 'juvenile dermatomyositis': 46,
 'neurofibromatosis type 1 (nf1)': 4,
 'achalasia': 9,
 "raynaud's phenomenon": 388,
 'hypothyroidism': 47,
 'viral myositis': 1,
 'congenital heart disease': 1,
 'anemia': 2}

scleroderma
systemic scleroderma (diffuse cutaneous systemic sclerosis)
systemic sclerosis (scleroderma)
21.5


21.5

In [99]:
top_three_tmp = [[resp[0][0], resp[1][0], resp[2][0]] for resp in claude_all_parsed_responses_informal]
top_three_responses_informal = [x for lst in top_three_tmp for x in lst]
print('Top three responses')
d= get_frequencies(top_three_responses_informal)
display(d)
sum = 0
tot = 0
for lst in top_three_tmp:
    lower_lst = [s.lower() for s in lst]
    if any('scler' in s for s in lower_lst):
        sum = sum + 1
    tot = tot +1
print(sum/tot*100)

Top three responses


{'juvenile idiopathic arthritis': 188,
 'polymyositis': 3,
 'esophageal stricture': 35,
 'connective tissue disorder (e.g., lupus, scleroderma)': 2,
 'eosinophilic esophagitis': 30,
 'molluscum contagiosum': 2,
 'systemic scleroderma (diffuse cutaneous systemic sclerosis)': 26,
 'lactose intolerance': 11,
 'diabetes mellitus': 15,
 'warts': 2,
 'inflammatory bowel disease (ibd)': 11,
 'hypermobility syndrome': 1,
 'gastroesophageal reflux disease (gerd)': 6,
 'epidermolysis bullosa': 2,
 'esophageal stricture or achalasia': 2,
 'juvenile rheumatoid arthritis': 2,
 'heart condition': 1,
 'anemia': 45,
 'scleroderma': 558,
 'esophageal dysmotility': 6,
 'lipoma': 2,
 "buerger's disease": 7,
 'systemic sclerosis (scleroderma)': 41,
 'cystic fibrosis': 6,
 'mixed connective tissue disease (mctd)': 105,
 'esophageal achalasia': 1,
 'fibromyalgia': 8,
 'folliculitis': 1,
 'growing pains': 18,
 'asthma': 21,
 'systemic lupus erythematosus (sle)': 119,
 'cyst': 2,
 'juvenile dermatomyositis': 

62.7
