<a href="https://colab.research.google.com/github/thomasbeck95/CTM/blob/main/GPRankingAlgorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [97]:
# set up
import pandas as pd
import numpy as np

## Generate ficticious ranking data
Define a function to imitate a clinician assigning a priority score to each task. Then simulate providing sets of tasks to the clinician, apply scoring function and rank. Task + rank will act as the inputs to a ranking algorithm

In [98]:
# define clinicians A opinions
medication_type_ranking_a = {
    'Insulin' : 3,
    'Anti-Epileptics' : 3,
    'Immunosuppressants' : 3,
    'Anti-Hypertension' : 2,
    'Statins' : 2,
    'SSRIs' : 2,
    'Vitamins' : 1,
    'Emollients' : 1,
    'Mild Pain Relief' : 1
    }
prescription_request_source_ranking_a = {
    "Out-of-hours GP" : 3,
    "Pharmacist" : 3,
    "Phone" : 2,
    "Routine request via online" : 2,
    "NHS app" : 2,
    "Routine scheduled repeats" : 1

}
investigation_source_ranking_a = {
    "Out-of-hours GP" : 3,
    "Pharmacist" : 3,
    "Phone" : 2,
    "Routine request via online" : 2,
    "NHS app" : 2,
    "Routine scheduled repeats" : 1

}
medication_urgency_ranking_a = {
    "<1 day supply" : 3,
    ">1 and <=3 days supply" : 1,
    ">3 days supply" : 0.5
}

comorbidities_ranking_a = {
    "Epilepsy" : 3,
    "Historical Heart Attack or Stroke" : 3,
    "Diabetes" : 2,
    "Mental Health" : 1
}

blood_results_limits = {
    'K+'  : {'LimitType' : 'Upper', 'LimitValue' : 6},
    'eGFR' : {'LimitType' : 'Lower', 'LimitValue' : 15},
    'PSA' : {'LimitType' : 'Upper', 'LimitValue' : 100},
    'Hb' : {'LimitType' : 'Lower', 'LimitValue' : 70},
    'INR' : {'LimitType' : 'Upper', 'LimitValue' : 8}
}

blood_results_rankings_a = {
    'K+'  : 2,
    'eGFR' : 2,
    'PSA' : 1,
    'Hb' : 1,
    'INR' : 1
}

ecg_reason_ranking_a = {
    'Suspected Heart Attack' : 3,
    'Suspected Coronary Heart Disease' : 2,
    'Suspected Arrythmia' : 1,
    'Screening for Medication' : 0.5
}

xray_region_ranking_a = {
    'Lower Limbs' : 0.5,
    'Upper Limbs' : 1,
    'Spine and Neck' : 2,
    'Torso' : 2,
    'Head' : 3
}

other_task_ranking_a = {'SickNotes' : 0.1, 'ReferralLetters' : 0.1, 'MedicalReports' : 0.1}

In [99]:
# function to simulate a doctors priority scoring
def score_task(task_type,
               medication_type = np.nan, medication_type_ranking = np.nan,
               medication_urgency = np.nan, medication_urgency_ranking = np.nan,
               prescription_request_source = np.nan, prescription_request_source_ranking = np.nan,
               monitoring_required = np.nan, monitoring_required_weighting = np.nan,
               polypharmacy = np.nan, polypharmacy_weighting = np.nan,
               investigation_type = np.nan,
               investigation_source = np.nan, investigation_source_ranking = np.nan,
               blood_results = np.nan, blood_results_limits = np.nan, blood_results_ranking = np.nan,
               comorbidities = np.nan, comorbidities_ranking = np.nan,
               xray_region = np.nan, xray_ranking = np.nan,
               ecg_reason = np.nan, ecg_reason_ranking = np.nan,
               other_task_ranking = np.nan,
               time_in_pipeline = np.nan, time_in_pipeline_weight = np.nan
               ):
  # apply different logic depending on the task type
  task_severity = 0

  if task_type == 'PrescriptionRequest':
    # assess which type of medication type
    medication_severity = medication_type_ranking[medication_type]
    # assess how long until out of medicine
    medication_urgency_severity = medication_urgency_ranking[medication_urgency]
    # assess whats the source of the request
    prescription_request_source_severity = prescription_request_source_ranking[prescription_request_source]
    # assess whether monitoring required
    if monitoring_required:
      monitoring_required_severity = monitoring_required_weighting
    else:
      monitoring_required_severity = 0
    # assess whether polypharmacy
    if polypharmacy:
      polypharmacy_severity = polypharmacy_weighting
    else:
      polypharmacy_severity = 0
    # combine into overall task severity
    task_severity = medication_severity + medication_urgency_severity + prescription_request_source_severity + monitoring_required_severity + polypharmacy_severity

  elif task_type == 'ReviewResults':
    # if an xray, assign severity based on region only
    if investigation_type == 'X-ray':
      task_severity = xray_ranking[xray_region]
    # if blood test, assign severity based on whether limits exceeded and clinician perspectives
    elif investigation_type == 'Blood Test':
      blood_test_score = 0
      # for each blood result, test if its under lower limit or over upper limit
      for test, result in blood_results.items():
        limit_value = blood_results_limits[test]['LimitValue']
        if blood_results_limits[test]['LimitType'] == 'Lower':
          if result < limit_value:
            blood_test_score += blood_results_ranking[test]
        elif blood_results_limits[test]['LimitType'] == 'Upper':
          if result > limit_value:
            blood_test_score += blood_results_ranking[test]
      task_severity = blood_test_score

    # if ECG, assign severity based on clinician perspective
    elif investigation_type == 'ECG':
      task_severity = ecg_reason_ranking[ecg_reason]
    task_severity += investigation_source_ranking[investigation_source]

  elif task_type == 'PatientCommunication':
    medication_severity = 0

  elif task_type == 'SickNotes':
    task_severity = other_task_ranking['SickNotes']

  elif task_type == 'ReferralLetters':
    task_severity = other_task_ranking['ReferralLetters']

  elif task_type == 'MedicalReports':
    task_severity = other_task_ranking['MedicalReports']


  # assess which comorbodities are present
  comorbidity_severity = 0
  for comorbidity in comorbidities:
    comorbidity_severity += comorbidities_ranking[comorbidity]

  # assess time in pipeline
  timing_severity = (time_in_pipeline ** 2) * time_in_pipeline_weight

  return task_severity + comorbidity_severity + timing_severity





In [100]:
# generate task inputs
## define available values
task_types = ['PrescriptionRequest', 'ReviewResults', 'PatientCommunication', 'SickNotes', 'ReferralLetters', 'MedicalReports']
medication_types = ['Insulin', 'Anti-Epileptics', 'Immunosuppressants', 'Anti-Hypertension', 'Statins', 'SSRIs', 'Vitamins', 'Emollients', 'Mild Pain Relief']
medication_urgencies = ['<1 day supply', '>1 and <=3 days supply', '>3 days supply']
prescription_request_sources = ["Out-of-hours GP", "Pharmacist", "Phone", "Routine request via online", "NHS app", "Routine scheduled repeats"]
investigation_types = ['X-ray', 'Blood Test', 'ECG']
investigation_sources = ['Out-of-hours GP', 'Pharmacist', 'Phone', 'Routine request via online', 'NHS app', 'Routine scheduled repeats']
comorbidities = ['Epilepsy', 'Historical Heart Attack or Stroke', 'Diabetes', 'Mental Health']
xray_regions = ['Lower Limbs', 'Upper Limbs', 'Spine and Neck', 'Torso', 'Head']
ecg_reasons = ['Suspected Heart Attack', 'Suspected Coronary Heart Disease', 'Suspected Arrythmia', 'Screening for Medication']

def generate_task(task_types, task_probabilities = None):
  # initialise task dictionary
  task_dict = {}
  # identify which task type
  task_type = np.random.choice(task_types, p = task_probabilities)
  task_dict['task_type'] = task_type
  if task_type == 'PrescriptionRequest':
    # randomise value
    medication_type = np.random.choice(medication_types)
    medication_urgency = np.random.choice(medication_urgencies)
    prescription_request_source = np.random.choice(prescription_request_sources)
    monitoring_required = np.random.choice([True, False], p = [0.1, 0.9])
    polypharmacy = np.random.choice([True, False], p = [0.1, 0.9])
    # add to dictionary
    task_dict['medication_type'] = medication_type
    task_dict['medication_urgency'] = medication_urgency
    task_dict['prescription_request_source'] = prescription_request_source
    task_dict['monitoring_required'] = monitoring_required
    task_dict['polypharmacy'] = polypharmacy
  elif task_type == 'ReviewResults':
    # randomise values
    investigation_type = np.random.choice(investigation_types)
    if investigation_type == 'X-ray':
      xray_region = np.random.choice(xray_regions)
      task_dict['xray_region'] = xray_region
    elif investigation_type == 'Blood Test':
      blood_results = blood_results = {
          'K+' : max([0, np.random.normal(3, 1.5)]),
          'eGFR' : max([0, np.random.normal(20, 5)]),
          'PSA' : max([0, np.random.normal(60, 10)]),
          'Hb' : max([0, np.random.normal(85, 5)]),
          'INR' : max([0, np.random.normal(3, 2)])
      }
      task_dict['blood_results'] = blood_results
    elif investigation_type == 'ECG':
      ecg_reason = np.random.choice(ecg_reasons)
      task_dict['ecg_reason'] = ecg_reason

    investigation_source = np.random.choice(investigation_sources)
    # add to dictionary
    task_dict['investigation_type'] = investigation_type
    task_dict['investigation_source'] = investigation_source

  elif task_type == 'PatientCommunication':
    pass
  elif task_type == 'SickNotes':
    pass
  elif task_type == 'ReferralLetters':
    pass
  elif task_type == 'MedicalReports':
    pass

  # identify comorbidities
  for combordity in comorbidities:
    has_combordity = np.random.choice([True, False], p = [0.1, 0.9])
    if has_combordity:
      task_dict[combordity] = 1
    else:
      task_dict[combordity] = 0
  # specify time in pipeline
  time_in_pipeline = np.random.gamma(1,2)
  task_dict['time_in_pipeline'] = time_in_pipeline
  # return dictionary
  return task_dict

In [101]:
# generate ficticious data
all_tasks = []
num_samples = 1000
for _ in range(num_samples):
  this_task = generate_task(task_types, task_probabilities = [0.3, 0.3, 0.1, 0.05, 0.05, 0.2])
  all_tasks.append(this_task)
all_tasks_df = pd.DataFrame(all_tasks)
display(all_tasks_df)

Unnamed: 0,task_type,xray_region,investigation_type,investigation_source,Epilepsy,Historical Heart Attack or Stroke,Diabetes,Mental Health,time_in_pipeline,medication_type,medication_urgency,prescription_request_source,monitoring_required,polypharmacy,ecg_reason,blood_results
0,ReviewResults,Head,X-ray,Phone,0,0,0,0,1.454815,,,,,,,
1,PrescriptionRequest,,,,0,0,0,0,1.177178,Anti-Hypertension,>1 and <=3 days supply,Phone,False,False,,
2,PrescriptionRequest,,,,0,0,0,0,3.069833,Vitamins,>1 and <=3 days supply,Routine scheduled repeats,False,False,,
3,MedicalReports,,,,0,0,0,0,1.388618,,,,,,,
4,MedicalReports,,,,0,0,0,0,1.361107,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,MedicalReports,,,,0,0,0,0,0.830575,,,,,,,
996,PrescriptionRequest,,,,0,1,0,0,0.112722,SSRIs,<1 day supply,Routine scheduled repeats,False,False,,
997,ReviewResults,,ECG,NHS app,0,0,0,0,1.312853,,,,,,Suspected Arrythmia,
998,PatientCommunication,,,,0,0,0,0,0.592058,,,,,,,


In [103]:
# apply ranking based on clinicians opinions
all_tasks_df['task_severity_a'] = all_tasks_df.apply(lambda x: score_task(task_type=x['task_type'],
                                                                        medication_type = x['medication_type'],
                                                                        medication_type_ranking = medication_type_ranking_a,
                                                                        medication_urgency = x['medication_urgency'],
                                                                        medication_urgency_ranking = medication_urgency_ranking_a,
                                                                        prescription_request_source = x['prescription_request_source'],
                                                                        prescription_request_source_ranking = prescription_request_source_ranking_a,
                                                                        monitoring_required = x['monitoring_required'],
                                                                        monitoring_required_weighting = 0.1,
                                                                        polypharmacy = x['polypharmacy'],
                                                                        polypharmacy_weighting = 0.3,
                                                                        investigation_type = x['investigation_type'],
                                                                        investigation_source = x['investigation_source'],
                                                                        investigation_source_ranking = investigation_source_ranking_a,
                                                                        blood_results = x['blood_results'],
                                                                        blood_results_limits = blood_results_limits,
                                                                        blood_results_ranking = blood_results_rankings_a,
                                                                        comorbidities = x[comorbidities][x[comorbidities] == 1].index.to_list(),
                                                                        comorbidities_ranking = comorbidities_ranking_a,
                                                                        xray_region = x['xray_region'],
                                                                        xray_ranking = xray_region_ranking_a,
                                                                        ecg_reason = x['ecg_reason'],
                                                                        ecg_reason_ranking = ecg_reason_ranking_a,
                                                                        other_task_ranking = other_task_ranking_a,
                                                                        time_in_pipeline = x['time_in_pipeline'],
                                                                        time_in_pipeline_weight = 0.1
                                                                          ), axis = 1)


In [106]:
all_tasks_df['task_severity_a'].describe()

Unnamed: 0,task_severity_a
count,1000.0
mean,4.429156
std,3.486698
min,1.062968e-07
25%,1.218052
50%,4.073267
75%,6.588958
max,23.7778


In [107]:
# set a id column
all_tasks_df.reset_index(inplace = True)
all_tasks_df.rename(columns = {'index' : 'task_id'}, inplace = True)

In [122]:
# bootstrap dataset and rank according to this clinicians preference
rank_set_size = 5
n_bootstraps = 100
ranking_results = []
for n in range(n_bootstraps):
  subsample = all_tasks_df.sample(n = rank_set_size, replace = False) # dont want to compare task to itself
  subsample_sorted = subsample.sort_values(by = 'task_severity_a', ascending = False).reset_index()
  subsample_sorted['Ranking'] = subsample_sorted.index + 1
  subsample_sorted['Group'] = n
  ranking_results.append(subsample_sorted)


In [123]:
subsample_sorted

Unnamed: 0,index,task_id,task_type,xray_region,investigation_type,investigation_source,Epilepsy,Historical Heart Attack or Stroke,Diabetes,Mental Health,...,medication_type,medication_urgency,prescription_request_source,monitoring_required,polypharmacy,ecg_reason,blood_results,task_severity_a,Ranking,Group
0,129,129,ReviewResults,,ECG,Out-of-hours GP,0,1,1,0,...,,,,,,Suspected Arrythmia,,12.073934,1,99
1,557,557,PrescriptionRequest,,,,0,0,0,0,...,Emollients,<1 day supply,Routine scheduled repeats,False,False,,,5.002975,2,99
2,61,61,ReviewResults,,ECG,Phone,0,0,0,0,...,,,,,,Suspected Coronary Heart Disease,,4.017214,3,99
3,895,895,ReferralLetters,,,,0,0,0,0,...,,,,,,,,1.618948,4,99
4,710,710,MedicalReports,,,,0,0,0,0,...,,,,,,,,1.47918,5,99
