In [3]:
import pandas as pd
import json

In [192]:
df = pd.read_csv("./final_for_model.csv")
df.head()

Unnamed: 0,gender,hipFracture,spineFracture,otherAdultFracture,fractureAge,fracturedBones,priorDexaScan,lastDexaScanDetails,fragilityFractureAfter45,youngLowTraumaFracture,...,recentWeightLoss,recentHeightLoss,spineSurgery,hipSurgery,gastricSurgery,currentlyPregnant,llm_data,llm_prediction,input_weight,total_weight
0,Female,Yes,Yes,Yes,55.0,Ribs,No,,No,Yes,...,No,Yes,No,No,No,No,,,,
1,Male,Yes,No,No,55.0,Pelvis,No,,Yes,No,...,No,No,No,No,No,No,,,,
2,Male,Yes,No,Yes,51.0,"Pelvis, Hip (Proximal Femur)",No,,Yes,No,...,Yes,No,No,No,No,No,,,,
3,Male,Yes,No,No,49.0,"Spine (Vertebrae), Ankle and Foot Bones, Wrist...",No,,No,No,...,No,No,No,No,No,No,,,,
4,Female,No,No,No,,,No,,No,Yes,...,No,Yes,No,No,No,No,,,,


In [193]:
with open("app/static/data.json", "r") as f:
    form_meta = json.load(f)
form_meta

[{'question': 'What is your gender?',
  'weight': 1,
  'type': 'select',
  'name': 'gender',
  'options': ['Male', 'Female']},
 {'question': 'Have you ever had a hip fracture?',
  'weight': 1,
  'type': 'boolean',
  'name': 'hipFracture'},
 {'question': 'Have you ever had a spine fracture?',
  'weight': 1,
  'type': 'boolean',
  'name': 'spineFracture'},
 {'question': 'Have you had a fracture other than spine or hip as an adult?',
  'weight': 0.5,
  'type': 'boolean',
  'name': 'otherAdultFracture'},
 {'question': 'At what age did your fracture(s) occur?',
  'weight': 0.5,
  'type': 'number',
  'name': 'fractureAge'},
 {'question': 'Which bone(s) were fractured?',
  'weight': 0.5,
  'type': 'multiselect',
  'options': ['Hip (Proximal Femur)',
   'Spine (Vertebrae)',
   'Wrist (Distal Radius)',
   'Humerus (Upper Arm Bone)',
   'Pelvis',
   'Ribs',
   'Ankle and Foot Bones'],
  'name': 'fracturedBones'},
 {'question': 'Have you had a prior bone density (DEXA) scan?',
  'weight': 0.5,
  

In [198]:
def evaluate_weights(row):
    input_weight = 0
    total_weight = 0
    is_male = row.get("gender") == "Male"

    for item in form_meta:
        col = item["name"]
        weight = item['weight']
        value = str(row.get(col))

        if item.get("femaleOnly") and is_male:
            continue

        total_weight += weight

        if value is None or value == '' or value == 'nan':
            continue

        if col == 'gender' and value == 'Female':
            input_weight += weight
            continue

        if item.get("type") == "boolean":
            # For some condition, add weights if value is No
            condition = 'No' if  item.get('negate') else 'Yes'
            if value == condition:
                input_weight += weight

        elif item.get("type") == "select":
            if value != "--None--":
                input_weight += weight

        elif item.get("type") == "multiselect":
            input_weight += weight

        else:
            input_weight += weight

    return pd.Series([input_weight, total_weight])

In [199]:
df[["input_weight", "total_weight"]] = df.apply(evaluate_weights, axis=1)
df[["gender", "input_weight", "total_weight"]].head()

Unnamed: 0,gender,input_weight,total_weight
0,Female,10.0,35.5
1,Male,7.5,31.0
2,Male,10.0,31.0
3,Male,7.0,31.0
4,Female,6.5,35.5


In [None]:
def generate_llm_data(data):
  is_male = data.get("gender") == "Male"

  data_format = {
      "gender": f"The patient is {data['gender']}",
      "hipFracture": f"The patient has reported a history of hip fracture {data['hipFracture']}",
      "spineFracture": f"Spine fracture {data['spineFracture']}",
      "otherAdultFracture": f"And other adult fractures {data['otherAdultFracture']}",
      "fractureAge": f"With the fracture occurring at the age of {data['fractureAge']}",
      "fracturedBones": f"The specific bone that was fractured was the {data['fracturedBones']}",
      "priorDexaScan": f"A DEXA scan has previously been performed {data['priorDexaScan']}",
      "lastDexaScanDetails": f"With the most recent scan details noted as {data['lastDexaScanDetails']}",
      "fragilityFractureAfter45": f"The patient has experienced a fragility fracture after age 45 {data['fragilityFractureAfter45']}",
      "youngLowTraumaFracture": f"And a low-trauma fracture at a younger age {data['youngLowTraumaFracture']}",
      "currentlyInMenopause": f"The patient is currently in menopause {data['currentlyInMenopause']}",
      "ageAtMenopause": f"Which began at the age of {data['ageAtMenopause']}",
      "menopauseType": f"And is classified as {data['menopauseType']} menopause",
      "menopauseStatus": f"The patient had been through menopause {data['menopauseStatus']}",
      "menopausalStage": f"The patient is at {data['menopausalStage']} stage.",
      "premenopausalAmenorrhea": f"The patient had  premenopausal Amenorrhea {data.get('premenopausalAmenorrhea')}",
      "parentHipFracture": f"They have parent hip fracture {data['parentHipFracture']}",
      "delayedPuberty": f"They had delayed puberty {data['delayedPuberty']}",
      "familyHistoryOsteoporosis": f"There is a family history of osteoporosis {data['familyHistoryOsteoporosis']}",
      "ovariesRemoved": f"The ovaries have been removed {data['ovariesRemoved']}",
      "familyHistoryHipFracture": f"And a parental history of hip fracture {data['familyHistoryHipFracture']}",
      "currentSmoker": f"The patient's smoking status is currently {data['currentSmoker']}",
      "everSmoked": f"With a history of smoking described as {data['everSmoked']}",
      "excessiveAlcoholIntake": f"Alcohol intake is noted as excessive: {data['excessiveAlcoholIntake']}",
      "historyOfFalls": f"There is a history of falls {data['historyOfFalls']}",
      "eatingDisorder": f"The patient also has a history of eating disorder {data['eatingDisorder']}",
      "highCalciumDiet": f"Nutritional intake includes a high-calcium diet {data['highCalciumDiet']}",
      "calciumSupplements": f"Along with the use of calcium supplements {data['calciumSupplements']}",
      "vitaminDSupplements": f"And vitamin D supplements {data['vitaminDSupplements']}",
      "longTermSteroids": f"The patient has a history of long-term steroid use {data['longTermSteroids']}",
      "takenEstrogen": f"Estrogen therapy {data['takenEstrogen']}",
      "glucocorticoidUse": f"They have taken glucocorticoid {data['glucocorticoidUse']}",
      "hormoneTherapy": f"They have taken hormon therapy {data['hormoneTherapy']}",
      "osteoporosisMedications": f"And has taken osteoporosis medications {data['osteoporosisMedications']}",
      "ssriUse": f"The SSRI usage for the patient is {data['ssriUse']}",
      "ppiUse": f"And PPI usage is {data['ppiUse']}",
      "rheumatoidArthritis": f"Relevant medical conditions include rheumatoid arthritis {data['rheumatoidArthritis']}",
      "primaryHyperparathyroidism": f"Primary Hyperthyroidism {data['primaryHyperparathyroidism']}",
      "hyperthyroidism": f"Hyperthyroidism {data['hyperthyroidism']}",
      "crohnsOrCeliac": f"Crohn’s or celiac disease {data['crohnsOrCeliac']}",
      "kidneyDiseaseOrDialysis": f"Kidney disease or dialysis {data['kidneyDiseaseOrDialysis']}",
      "copd": f"COPD {data['copd']}",
      "hivAids": f"HIV/AIDS {data['hivAids']}",
      "depression": f"Depression {data['depression']}",
      "diabetes": f"And diabetes {data['diabetes']}",
      "recentWeightLoss": f"The patient has recently experienced weight loss {data['recentWeightLoss']}",
      "recentHeightLoss": f"And height loss {data['recentHeightLoss']}",
      "spineSurgery": f"And has undergone spine surgery {data['spineSurgery']}",
      "hipSurgery": f"Or hip surgery {data['hipSurgery']}",
      "gastricSurgery": f"Or gastric surgery {data['gastricSurgery']}",
      "currentlyPregnant": f"Lastly, it is noted whether the patient is currently pregnant {data['currentlyPregnant']}",
  }

  llm_data = ''

  for item in form_meta:
    if is_male and item.get('femaleOnly'):
      continue
    llm_data += data_format[item['name']] + ". "


  risk_percentage = data['input_weight'] / data['total_weight'] * 100
  llm_prediction = f"The patient is at {risk_percentage:.2f}% risk of osteoporosis"

  return pd.Series([llm_data, llm_prediction])

df[["llm_data", "llm_prediction"]] = df.apply(generate_llm_data, axis=1)
df[["llm_data", "llm_prediction"]].head()

Unnamed: 0,llm_data,llm_prediction
0,The patient is Female. The patient has reporte...,The patient is at 28.17% risk of osteoporosis
1,The patient is Male. The patient has reported ...,The patient is at 24.19% risk of osteoporosis
2,The patient is Male. The patient has reported ...,The patient is at 32.26% risk of osteoporosis
3,The patient is Male. The patient has reported ...,The patient is at 22.58% risk of osteoporosis
4,The patient is Female. The patient has reporte...,The patient is at 18.31% risk of osteoporosis


In [205]:
df.to_csv('final_llm_data.csv', index=False)