In [1]:
from openai import OpenAI


In [2]:
most_common_symptoms = [('Progressive proximal muscles weakness', 0.91),
                    ('Fatigue', 0.91),
                    ('Gottron Papule', 0.74),
                    ('Heliotrope Rash', 0.76),
                    ('Malar Rash', 0.71),
                    ('Periungal Erythema', 0.63),
                    ('Muscle Pain and Tenderness', 0.54)]


In [71]:
def prompt_open_ai(prompt_str, model="gpt-3.5-turbo"):
    client = OpenAI()
    response = client.chat.completions.create(
        model=model,
        messages=[
        {
        "role": "system",
        "content": """ I am taking care of a patient and would like your help generating a differential diagnosis.  I will provide details 
        about the patient and the symptoms below. Please output upto 5 conditions that are consistent with the symptoms sorted from the most likely to the least. Your 
        output must be in the format wherein each of the conditions must be in a separate line.
                        1. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>
                        2. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>
                        ...
                        5. <name of condition/disease>, <probability of this disease as a percentage>, <brief rationale as to why the patient may have this disease>
                        """
        },
        {
        "role": "user",
        "content": prompt_str
        }
        ],
        temperature=0.7,
        max_tokens=1024,
        top_p=1
    )
    #print(response)
    #print(response.choices[0].message.content)
    return response.choices[0].message.content

In [156]:
def generate_prompt_str(symptom_list, age=10, gender='female', weight=80, height=54):
    patient_descr = f'The patient is a {age} year old {gender} with a weight of {weight} lbs and height of {height} inches.'
    symptoms_str = 'The list of symptoms include '
    for (j, s) in enumerate(symptom_list):
        if j != len(symptom_list)-1:
            symptoms_str = symptoms_str + f' ({j+1}) {s}'
        else:
            symptoms_str = symptoms_str + f' and ({j+1}) {s}.'
    return patient_descr + ' ' + symptoms_str

In [157]:
generate_prompt_str(['Progressive proximal muscles weakness', 'Fatigue', 'Gottron Papule'])

'The patient is a 10 year old female with a weight of 80 lbs and height of 54 inches. The list of symptoms include  (1) Progressive proximal muscles weakness (2) Fatigue and (3) Gottron Papule.'

In [158]:
from random import random, shuffle
def generate_random_prompt_str(symptom_freq_list, age=10, gender='female', weight=80, height=54):
    lst = []
    for (s, f) in symptom_freq_list:
        r = random()
        if r <= f:
            lst.append(s)
    shuffle(lst)
    return lst, generate_prompt_str(lst, age, gender, weight, height)

In [159]:
symptoms_list, prompt = generate_random_prompt_str(most_common_symptoms)
print(prompt)

The patient is a 10 year old female with a weight of 80 lbs and height of 54 inches. The list of symptoms include  (1) Progressive proximal muscles weakness (2) Periungal Erythema and (3) Heliotrope Rash.


In [160]:
response = prompt_open_ai(prompt)

In [161]:
print(response)

1. Dermatomyositis, 80%, Dermatomyositis is a systemic autoimmune disease characterized by inflammation of the muscles and skin. The patient's symptoms of progressive proximal muscle weakness, periungal erythema, and heliotrope rash are classic features of dermatomyositis.
2. Juvenile Idiopathic Arthritis, 15%, Juvenile Idiopathic Arthritis (JIA) is a common autoimmune disease in children that can cause muscle weakness and skin manifestations like rash. However, the specific symptoms of periungal erythema and heliotrope rash are more indicative of dermatomyositis.
3. Lupus, 5%, Lupus is another autoimmune disease that can present with muscle weakness and skin rashes, including a heliotrope rash. However, the periungal erythema is less commonly associated with lupus compared to dermatomyositis.
4. Polymyositis, 3%, Polymyositis is a similar condition to dermatomyositis but typically lacks the characteristic skin findings such as periungal erythema and heliotrope rash.
5. Systemic Sclero

In [69]:
import re
def parse_response(response):
    lst_of_lines = response.split('\n')
    results = []
    for (j, line) in enumerate(lst_of_lines): 
        m = re.match(r"(\d+).\s*(.*)\s*,\s*(\d+)%,(.*)", line)
        if m != None:
            results.append((m.group(2), m.group(3), m.group(4)))
    return results

In [70]:
parse_response(response)

[('Juvenile Dermatomyositis',
  '70',
  ' Fatigue, malar rash, and progressive proximal muscle weakness are classic symptoms of juvenile dermatomyositis, which is an autoimmune condition that affects the skin and muscles.'),
 ('Systemic Lupus Erythematosus (SLE)',
  '20',
  ' Fatigue and malar rash can be seen in SLE, although progressive proximal muscle weakness is less common in this condition.'),
 ('Hypothyroidism',
  '5',
  ' Fatigue can be a symptom of hypothyroidism, although the malar rash and progressive proximal muscle weakness are less typical.'),
 ('Polymyositis',
  '3',
  ' Progressive proximal muscle weakness is a hallmark symptom of polymyositis, but the presence of fatigue and malar rash makes juvenile dermatomyositis a more likely diagnosis.'),
 ('Chronic Fatigue Syndrome',
  '2',
  ' Fatigue is a prominent symptom in chronic fatigue syndrome, but the presence of malar rash and progressive proximal muscle weakness makes autoimmune conditions like juvenile dermatomyositi

In [104]:
def run_symptoms_and_parse_responses(symptoms_freq_list, nreps=250, debug=False):
    all_symptoms_list = []
    all_responses = []
    for i in range(nreps):
        symptoms_list, prompt = generate_random_prompt_str(symptoms_freq_list)
        if debug:
            print(f'\t {prompt}')
        response = prompt_open_ai(prompt)
        if debug:
            print(f'\t {response}')
        result = parse_response(response)
        if debug:
            print(f'\t\t {result}')
        all_responses.append(result)
        all_symptoms_list.append(symptoms_list)
    return all_symptoms_list, all_responses
    

In [84]:
all_symptoms_list, all_responses = run_symptoms_and_parse_responses(most_common_symptoms, 250)

In [86]:
print(all_symptoms_list)

[['Fatigue', 'Progressive proximal muscles weakness', 'Gottron Papule', 'Periungal Erythema', 'Malar Rash', 'Muscle Pain and Tenderness'], ['Fatigue', 'Muscle Pain and Tenderness', 'Periungal Erythema', 'Progressive proximal muscles weakness', 'Heliotrope Rash', 'Malar Rash'], ['Fatigue', 'Progressive proximal muscles weakness', 'Periungal Erythema', 'Muscle Pain and Tenderness', 'Malar Rash'], ['Fatigue', 'Periungal Erythema', 'Gottron Papule'], ['Fatigue', 'Periungal Erythema', 'Muscle Pain and Tenderness', 'Progressive proximal muscles weakness', 'Heliotrope Rash', 'Gottron Papule'], ['Periungal Erythema', 'Fatigue', 'Muscle Pain and Tenderness', 'Heliotrope Rash', 'Progressive proximal muscles weakness'], ['Progressive proximal muscles weakness', 'Gottron Papule', 'Heliotrope Rash', 'Fatigue', 'Periungal Erythema'], ['Fatigue', 'Malar Rash', 'Gottron Papule', 'Progressive proximal muscles weakness'], ['Malar Rash', 'Progressive proximal muscles weakness', 'Muscle Pain and Tendernes

In [85]:
print(all_responses)

[[('Juvenile Dermatomyositis', '70', " Juvenile Dermatomyositis is a rare autoimmune disease that affects the skin and muscles. The patient's symptoms of progressive proximal muscle weakness, Gottron papules, periungal erythema, malar rash, muscle pain, and tenderness are all consistent with this condition."), ('Systemic Lupus Erythematosus (SLE)', '20', " Systemic Lupus Erythematosus is a chronic autoimmune disease that can affect various parts of the body, including the skin and muscles. The patient's symptoms of fatigue, malar rash, and muscle pain and tenderness are common in SLE."), ('Polymyositis', '5', " Polymyositis is an inflammatory muscle disease that causes muscle weakness. The patient's symptoms of progressive proximal muscle weakness, muscle pain, and tenderness are typical of this condition."), ('Hypothyroidism', '3', " Hypothyroidism can cause fatigue and muscle weakness. While the patient's other symptoms are not characteristic of hypothyroidism, it is still a possibil

In [89]:
print([lst[0][0] for lst in all_responses])

['Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Dermatomyositis', 'Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Dermatomyositis', 'Juvenile Dermatomyositis', 'Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juveni

In [96]:
def dump_data_to_file(filename, all_symptoms_list, all_responses_list):
    file_handle = open(filename, 'w')
    count = 1
    for (symptoms, responses) in zip(all_symptoms_list, all_responses_list):
        print(f'Trial # {count}', file=file_handle)
        print('Symptoms:', file=file_handle)
        for s in symptoms: 
            print(f'\t {s}', file=file_handle)
        print('Responses:', file=file_handle)
        for (j, (name, pct, rationale)) in enumerate(responses):
            print(f'\t {j+1}, {name}, {pct}, {rationale}', file=file_handle)
        count = count + 1
    file_handle.close()

In [97]:
dump_data_to_file('most_common_chatgpt_3.5_250.txt', all_symptoms_list, all_responses)

In [92]:
full_symptoms = most_common_symptoms + [
                    ('Weight Loss', 0.35),
                    ('Falling Episodes', 0.4),
                    ('Arthritis', 0.38),
                    ('Fever', 0.4),
                    ('Lymphadenopathy', 0.4),
                    ('Dysphagia', 0.3),
                    ('Joint Contracture', 0.3),
                    ('Shawl Sign', 0.25),
                    ('Dyspnea on Exertion', 0.24),
                    ('Photosensitive Rashes', 0.28),
                    ('Raynaud\'s Syndrome', 0.19),
                    ('Edema', 0.22),
                    ('Gingivitis', 0.18),
                    ('Cutaneous Ulceration', 0.18),
                    ('Interstitial Lung Disease', 0.05)
                ]

In [103]:
print(full_symptoms)

[('Progressive proximal muscles weakness', 0.91), ('Fatigue', 0.91), ('Gottron Papule', 0.74), ('Heliotrope Rash', 0.76), ('Malar Rash', 0.71), ('Periungal Erythema', 0.63), ('Muscle Pain and Tenderness', 0.54), ('Weight Loss', 0.35), ('Falling Episodes', 0.4), ('Arthritis', 0.38), ('Fever', 0.4), ('Lymphadenopathy', 0.4), ('Dysphagia', 0.3), ('Joint Contracture', 0.3), ('Shawl Sign', 0.25), ('Dyspnea on Exertion', 0.24), ('Photosensitive Rashes', 0.28), ("Raynaud's Syndrome", 0.19), ('Edema', 0.22), ('Gingivitis', 0.18), ('Cutaneous Ulceration', 0.18), ('Interstitial Lung Disease', 0.05)]


In [106]:
all_symptoms_list_2, all_responses_2 = run_symptoms_and_parse_responses(full_symptoms, 1000)

In [107]:
dump_data_to_file('full_chatgpt_3.5_1000.txt', all_symptoms_list_2, all_responses_2)

In [108]:
print([lst[0][0] for lst in all_responses_2])

['Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Idiopathic Arthritis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Dermatomyositis', 'Juvenile Derm

In [181]:
def get_frequencies(lst_of_top_responses):
    lower_case_responses = [s.lower() for s in lst_of_top_responses]
    return {i:lower_case_responses.count(i) for i in set(lower_case_responses)}

In [182]:
get_frequencies([lst[0][0] for lst in all_responses_2])

{'systemic lupus erythematosus (sle)': 1,
 'juvenile idiopathic arthritis': 20,
 'juvenile dermatomyositis': 948,
 'dermatomyositis': 30,
 'systemic lupus erythematosus': 1}

In [119]:
def get_frequency_in_top_k_responses(lst_of_responses, k=5):
    count = 0
    for lst in lst_of_responses:
        for (s,_,_) in lst[:k]:
            if s == 'Dermatomyositis' or s == 'Juvenile Dermatomyositis':
                count = count + 1
                break
    return count

In [120]:
get_frequency_in_top_k_responses(all_responses_2)

998

In [121]:
get_frequency_in_top_k_responses(all_responses_2,3)

997

In [169]:
symptom_medical_to_descriptive = {
    'Progressive proximal muscles weakness': ['Muscle Weakness', 'Activity intolerance', 'Exercise intolerance','Cannot sit up','Cannot stand up','Cannot walk up stairs','Cannot raise arms','Cannot wash hair'],
    'Fatigue': ['Fatigue', 'Tiredness'],
    'Gottron Papule':['Hand rash', 'Rash on hands','Rash on knuckles'],
    'Heliotrope Rash':['Rash on eyelids','Purple eyelids','Dark eyelids'],
    'Malar Rash':['Rash on face','Rash on cheeks','Butterfly rash','Rosy cheeks'],
    'Periungal Erythema':['Red Cuticles'],
    'Muscle Pain and Tenderness':['Muscle Pain', 'Muscle Tenderness', 'Musculoskeletal pain', 'Leg pain','Arm pain','Sore muscles', 'Flu like ache'],
    'Weight Loss':['Loss of Weight', 'Weight Loss'],
    'Falling Episodes':['Frequent Falls','Loss of Coordination', 'Coordination Issues'],
    'Arthritis':['Joint swelling','Joint pain','Joint tenderness','Hard to move joints'],
    'Fever':['Fever', 'Elevated Body Temperature'],
    'Lymphadenopathy':['Big lymph nodes','Bumps on neck','Bumps under arms'],
    'Dysphagia':['Trouble swallowing','Trouble swallowing liquids','Trouble swallowing solids','Food gets stuck in throat','Pills get stuck in throat'],
    'Joint Contracture':['Contracture','Can\'t move joint','Stuck joint','Joint stuck'],
    'Shawl Sign':['Rash on Chest','Rash on Back', 'Rash on Chest and Back'],
    'Dyspnea on Exertion':['Trouble breathing','Shortness of breath','Trouble breathing with activity'],
    'Photosensitive Rashes':['Can\'t tolerate sunlight exposure', 'Rash that comes on in sun'],
    'Raynaud\'s Syndrome':['Fingers turning white','Finger turning blue','Cold hands'],
    'Edema':['Hand swelling','Arm swelling','Leg swelling'],
    'Gingivitis':['Red gums','Painful gums','Tender gums'],
    'Cutaneous Ulceration':['Sores on hands','Cuts on hands'],
    'Interstitial Lung Disease':['Shorness of breath']
}

In [170]:
from random import choice

def translate_symptoms(lst, symptom_medical_to_descriptive):
    new_list = []
    for s in lst:
        if s in symptom_medical_to_descriptive:
            replacements = symptom_medical_to_descriptive[s]
            new_list.append(choice(replacements))
        else:
            print(f'Warning: no translation for {s}')
            new_list.append(s)
    return new_list

In [171]:
from random import random, shuffle
def generate_random_prompt_str_with_translation(symptom_freq_list, symptom_medical_to_descriptive, age=10, gender='female', weight=80, height=54):
    lst = []
    for (s, f) in symptom_freq_list:
        r = random()
        if r <= f:
            lst.append(s)
    shuffle(lst)
    new_list = translate_symptoms(lst, symptom_medical_to_descriptive)
    return lst, new_list, generate_prompt_str(new_list, age, gender, weight, height)

In [172]:
lst, new_lst, prompt = generate_random_prompt_str_with_translation(full_symptoms,symptom_medical_to_descriptive )
print(prompt)

The patient is a 10 year old female with a weight of 80 lbs and height of 54 inches. The list of symptoms include  (1) Fatigue (2) Hand rash (3) Big lymph nodes (4) Cannot wash hair (5) Musculoskeletal pain (6) Rash on face (7) Loss of Weight (8) Trouble swallowing (9) Rash on eyelids and (10) Hard to move joints.


In [173]:
resp = prompt_open_ai(prompt)

In [174]:
print(resp)

1. Juvenile Idiopathic Arthritis, 70%, The patient presents with musculoskeletal pain, difficulty moving joints, hand rash, and fatigue, which are consistent with juvenile idiopathic arthritis. 
2. Lupus, 60%, The patient has a rash on the face and eyelids, musculoskeletal pain, fatigue, and weight loss, which are common symptoms of lupus.
3. Juvenile Dermatomyositis, 50%, The patient's symptoms of rash on face, eyelids, hand rash, muscle weakness (cannot wash hair), fatigue, and weight loss are indicative of juvenile dermatomyositis.
4. Lymphoma, 40%, Enlarged lymph nodes, weight loss, fatigue, and rash on the face could be suggestive of lymphoma.
5. Systemic Sclerosis, 30%, The patient's symptoms of musculoskeletal pain, difficulty moving joints, hand rash, and trouble swallowing are consistent with systemic sclerosis.


In [175]:
def run_symptoms_and_parse_responses(symptoms_freq_list,symptom_medical_to_descriptive, nreps=1000, debug=False):
    orig_symptoms_list = []
    translated_symptoms_list = []
    all_responses = []
    for i in range(nreps):
        orig_symptoms,  translated_symptoms, prompt = generate_random_prompt_str_with_translation(symptoms_freq_list,symptom_medical_to_descriptive )
        if debug:
            print(f'\t {prompt}')
        response = prompt_open_ai(prompt)
        if debug:
            print(f'\t {response}')
        result = parse_response(response)
        if debug:
            print(f'\t\t {result}')
        all_responses.append(result)
        orig_symptoms_list.append(orig_symptoms)
        translated_symptoms_list.append(translated_symptoms)
    return orig_symptoms_list,translated_symptoms_list, all_responses



In [184]:
orig_symptoms_list_3,translated_symptoms_list_3, all_responses_3 = run_symptoms_and_parse_responses( full_symptoms, symptom_medical_to_descriptive, nreps=2500) 

In [185]:
def dump_translated_data_to_file(filename, orig_symptoms_list, translated_list, all_responses_list):
    file_handle = open(filename, 'w')
    count = 1
    for (symptoms, translated, responses) in zip(orig_symptoms_list, translated_list, all_responses_list):
        print(f'Trial # {count}', file=file_handle)
        print('Original Symptoms:', file=file_handle)
        for s in symptoms: 
            print(f'\t {s}', file=file_handle)
        print('Translated Symptoms:', file=file_handle)
        for s in translated:
            print(f'\t {s}', file=file_handle)
        print('Responses:', file=file_handle)
        for (j, (name, pct, rationale)) in enumerate(responses):
            print(f'\t {j+1}, {name}, {pct}, {rationale}', file=file_handle)
        count = count + 1
    file_handle.close()

In [186]:
dump_translated_data_to_file('full_translated_chatgpt3.5_1000.txt', orig_symptoms_list_3,translated_symptoms_list_3, all_responses_3)

In [187]:
get_frequencies([lst[0][0] for lst in all_responses_3])

{'vitamin c deficiency (scurvy)': 1,
 'guillain-barre syndrome': 1,
 'juvenile diabetes': 1,
 'streptococcal pharyngitis': 1,
 'eosinophilic esophagitis': 12,
 'juvenile systemic lupus erythematosus': 1,
 'kawasaki disease': 19,
 "raynaud's phenomenon": 6,
 'dermatomyositis': 6,
 'lyme disease': 1,
 'vitamin d deficiency': 1,
 'asthma': 1,
 'atopic dermatitis': 2,
 'scarlet fever': 7,
 'lupus': 10,
 'systemic lupus erythematosus': 305,
 'esophageal stricture': 1,
 'juvenile idiopathic arthritis': 1143,
 'pulmonary embolism': 1,
 'hand, foot, and mouth disease': 1,
 'epiglottitis': 1,
 'fifth disease (erythema infectiosum)': 4,
 'rheumatic fever': 1,
 "raynaud's disease": 1,
 'myasthenia gravis': 1,
 'juvenile rheumatoid arthritis': 7,
 'henoch-schönlein purpura': 1,
 'systemic lupus erythematosus (sle)': 77,
 'iron deficiency anemia': 6,
 'duchenne muscular dystrophy': 3,
 'juvenile dermatomyositis': 877}

In [188]:
get_frequency_in_top_k_responses(all_responses_3)

1894

In [189]:
get_frequency_in_top_k_responses(all_responses_3, k=3)

1582