# Handcrafted

From the diagnostic task pipeline : 

- Correct condition
- Related condition
- Unrelated condition

In [93]:
import json
import os
import pandas as pd
import matplotlib.pyplot as plt
from helper_metrics import *

# JSON example

Assume that for there is a json file outputed for each condition (meaning if several conditions are found, multiple cases are generated)

In [85]:
# CONSTANTS AND VARIABLES
ground_truth = "cholera"
json_file = "diagnostic_task_pipeline_example.json"
THRESHOLD = 80

In [89]:
condition, related_conditions = load_and_retreive(json_file)

classification = classify_condition(condition, ground_truth, related_conditions, THRESHOLD)

print("Classification :", classification)

Classification : correct


# Prompt example

If the output is represented in a string format and is not a proper json, we use the regex to locate the informations we want.

In [74]:
prompt = '''{
    "description" : "Extract a diagnosis from a guideline",
    "type" : "guideline",
    "prompt" : "Fill the following structure accordingly. Don't change it.\nIf the information is not given, don't write anything, leave it as an empty section.\nIf an information is true for multiple symptoms, repeat the information.\nOnly answer with the filled structure.\n",
    "document_structure": {
      "Condition": "cholera",
      "Symptoms list": [],
      "Contextual info": {
        "Factors affecting diagnostic": []
      },
      "Additional test to decide between diagnosis": "",
      "Related diagnosis": ["disease1", "disease2"]
    }
  }
'''

In [92]:
condition, related_conditions = retreive_from_string(prompt)

classification = classify_condition(condition, ground_truth, related_conditions, THRESHOLD)

print("Classification :", classification)

['disease1', 'disease2']
Classification : correct


# Pipeline

In [None]:
path = "json/"
files = os.listdir(path)
files = [file for file in files if file.endswith(".json")]

results = []

for file in files:
    with open(path + file) as f:
        answer = json.load(f)
        condition, related_conditions = load_and_retreive(file)
        classification = classify_condition(condition, ground_truth, related_conditions, THRESHOLD)
        results.append([file, condition, classification])
        f.close()

# plot the distribution of results
df = pd.DataFrame(results, columns = ['file', 'condition', 'classification'])
df['classification'].value_counts().plot(kind='bar')
plt.show()