In [None]:
# source
# https://github.com/AldoF95/bart-chat-summarizer-finetuning/blob/main/Bart_large_xsum_fine_tuned_samsum.ipynb

In [None]:
# Run the following in Google Colab
# 1. import python libraries
# !pip install accelerate -U
# !pip install datasets
# !pip install rouge.score nltk py7zr

# check version
# import accelerate
# import transformers

# transformers.__version__, accelerate.__version__

# mount google drive
# from google.colab import drive
# drive.mount('/content/drive')


# clone ACI-BENCH repository and navigate to directory

# NOTE run this line at least once
# !git clone https://github.com/wyim/aci-bench.git
# import pandas as pd
# !cd aci-bench/data/src_experiment_data



In [None]:
# initialize and train the model
# fine tuning BART for summarization
# https://medium.com/@ferlatti.aldo/fine-tuning-a-chat-summarizer-c18625bc817d

# abstractive summarization: generates new words using a generative model.

import numpy as np
import transformers
from transformers import AutoTokenizer,AutoModelForSeq2SeqLM,Seq2SeqTrainingArguments,Seq2SeqTrainer
# #
# from transformers import DataCollatorForSeq2Seq as data_collator
# from transformers import rouge
# ##
from datasets import load_dataset, load_from_disk,load_metric
import nltk
nltk.download('punkt')

max_input = 512
max_target = 128
batch_size = 3
model_checkpoints = "facebook/bart-large-xsum"

metric = load_metric('rouge')

# dataset
train_data = '/content/aci-bench/data/src_experiment_data/train_aci_asrcorr.csv'
test1_data = '/content/aci-bench/data/src_experiment_data/test1_aci_asr.csv'
test2_data = '/content/aci-bench/data/src_experiment_data/test2_aci_asr.csv'
test3_data = '/content/aci-bench/data/src_experiment_data/test3_aci_asr.csv'


dataset = load_dataset('csv',data_files={'train':train_data, 'test':[test1_data, test2_data, test3_data]},column_names=['dataset','id','dialogue','note'])
print(dataset)

# tokenize data
tokenizer = AutoTokenizer.from_pretrained(model_checkpoints)

def preprocess_data(data_to_process):
  """"""
  print('Preprocessing data')
  inputs = [dialogue for dialogue in data_to_process['dialogue']]
  # tokenize dialogues
  model_inputs = tokenizer(inputs, max_length=max_input, padding='max_length',truncation=True)
  # tokenize the summaries
  with tokenizer.as_target_tokenizer():
    targets = tokenizer(data_to_process['note'],max_length=max_target, padding='max_length',truncation=True)
  # set labels
  model_inputs['labels'] = targets['input_ids']
  # return the tokenized data
  return model_inputs

tokenize_data = dataset.map(preprocess_data, batched = True)

# load model
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoints)

collator = transformers.DataCollatorForSeq2Seq(tokenizer, model=model)

def compute_rouge(pred):
  predictions, labels = pred
  #decode the predictions
  decode_predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True)
  #decode labels
  decode_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

  #compute results
  res = metric.compute(predictions=decode_predictions, references=decode_labels, use_stemmer=True)
  #get %
  res = {key: value.mid.fmeasure * 100 for key, value in res.items()}

  pred_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
  res['gen_len'] = np.mean(pred_lens)

  return {k: round(v, 4) for k, v in res.items()}


# define parameters for dataset
args = Seq2SeqTrainingArguments(
    'medical_note_hpi', #
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=3,
    predict_with_generate=True,
    eval_accumulation_steps=3,
    fp16=True #available only with CUDA
    )

trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=tokenize_data['train'],
    eval_dataset=tokenize_data['test'],
    data_collator=collator,
    tokenizer=tokenizer,
    compute_metrics= compute_rouge
)

trainer.train()

# give model a new interview and see how it performs.



In [None]:
# test code using portion of aci-bench dataset
validate = """[doctor] hey diana it's good to see you in here so i see that you injured your wrist could you tell me a bit about what happened [patient] yeah i was walking up and down the stairs i was doing my laundry and i slipped and i tried to catch myself and i put my arms out to catch myself and then all of a sudden i just my wrist started to hurt real bad and it got real swollen [doctor] wow okay so which wrist are we talking about left or right [patient] it's my right one of course [doctor] okay and then have you ever injured this arm before [patient] no i have not [doctor] okay alright so on a scale of one to ten how severe is the pain [patient] gosh it's like a nine [doctor] wow okay have you done anything to ease it [patient] yeah i did the ice thing i put ice on it and then i you know i even i have a ace wrap at home i try to do that [doctor] mm-hmm [patient] and then i took some ibuprofen but it helps a little bit but it's just it's it's just not right [doctor] okay [patient] really [doctor] yeah okay have you sorry i'm trying to think how long ago did this injury happen [patient] this happened yesterday morning [doctor] okay [patient] maybe just you know i just bumped it but [doctor] okay [patient] it's just not it's really bad [doctor] okay no i understand okay so i'm going so you said you were doing laundry [patient] yes i had my back hit my basket and for some reason this cold started to kinda fall out a little bit i was trying to catch it i missed a step and i just totally [doctor] okay alright any does the pain extend anywhere [patient] no not really [doctor] okay [patient] it's just really along my wrist [doctor] okay any numbness any tingling [patient] a little one and one ca n't tell if it's just because of the swelling in my wrist but just i can like i can feel it my fingers still [doctor] mm-hmm [patient] but just maybe a little bit of tingling [doctor] okay alright and are you so so okay i'm gon na think on this but in the meantime i'm gon na do my physical exam alright [patient] okay [doctor] okay so you know looking at your looking at your head and your neck i do n't appreciate any like adenopathy no thyromegaly no no carotid bruit looking at your listening to your heart i do n't appreciate any murmur no rub no gallop your lungs are clear to auscultation bilaterally your lower legs you have palpable pulses no lower edema your shoulders every like your upper extremities i see normal range of movement with your right wrist let's go ahead and focus on it so when i push on the inside here does it hurt [patient] yes [doctor] okay [patient] it does [doctor] and what about the outside does that hurt as well [patient] yeah it does [doctor] are you able to move your wrist towards your arm like [patient] not without extreme pain [doctor] okay so pain on flexion what about extension when you pick your wrist up [patient] yeah i have a hard time doing that actually [doctor] alright what about we're gon na go ahead and hold your arm like straight like flat and then try and move it sideways does radial deviation hurt [patient] yeah [doctor] alright and then lateral as well [patient] yeah it's really hard to move any direction of this hand for some reason [doctor] alright so wrist abduction adduction positive for pain on movement are you able to make a fist [patient] hmmm yeah a little bit but i ca n't do it really tight [doctor] okay alright okay so i'm just gon na go ahead and feel on your fingers really quickly alright metacarpals intact noticed some obvious swelling ecchymosis obvious swelling and bruising tenderness on palpation throughout there is evidence of potential fracture feeling some bony crepitus alright so this pain is it like chronic i wanted to ask you [patient] yeah i would say it kinda goes away when i take that ibuprofen but for the most part i feel it i feel it there and it it's just really really bad when i move it all [doctor] okay so when you like is there a position either hurts less or hurts more like say if your arm is raised and elevated over your head does it hurt more or is it just best to keep it like down [patient] it's good if i keep it a little bit above my like a little i guess a little bit like around my like just a regular level like if you're typing or something and then i just put it on a pillow and i just let it stay straight like i feel better [doctor] okay yeah no i do n't think i understand completely okay so i took a look at your vitals and your blood pressure is a little elevated but honestly that's probably to do with the pain right our body can respond to pain in that way we are looking at like a hundred and forty over over seventy it's not anything crazy but something to mention i see that your heart rate is also a little elevated at like about like eighty beats a minute you are not running a fever so that's great look at ninety ninety seven . two your respiratory rate is pretty normal at like twenty so before we came in i i know that we had you do an x-ray and i'm sure that that was a bit more painful because we had to do so many manipulations but i do wan na note that you are positive for what we call a colles' fracture what that means is that the joints between your wrist like the bones between your wrist that there there is evidence of a a fracture and we are gon na have to treat it a little conservatively at first and then consider some of the options options that are available to us so for your primary diagnosis of a colles' fracture we are going to give you a thumb spica for today and that's going to [patient] i'm sorry [doctor] pardon what [patient] a what [doctor] we're gon na brace you we're gon na give you a brace [patient] okay thank you [doctor] sorry no problem sorry yeah not a thumb spica we're gon na brace your arm and you're gon na have that we we have a couple of options but i think the best course of action is gon na be for surgery we will in the meantime give you pain medication i wan na put you on fifty milligrams of ultram every six hours and then i also wan na get you on get you into physical therapy a few weeks after surgery this is gon na be just a normal procedure you will be in for an overnight stay but after that once we assess and make sure that everything is good you'll be able to go home okay [patient] when do i have to have the surgery [doctor] we would like it to happen as quickly as possible you know your body is a wonderful miracle and it's going to start trying to heal on it's own what we need to do is get your wrist straight and then like put screws in to make sure that we hold it in place or else it could like heal and malform [patient] okay [doctor] alright so what [patient] how how long do i have to wear that brace [doctor] you're gon na be wearing the brace for about six weeks [patient] six weeks [doctor] yeah so you're gon na you're gon na come in for your surgery we're gon na perform it you're gon na stay overnight and then you'll be bracing it for six weeks in the meantime you'll also then go to physical therapy i want you there like we're gon na they're gon na do an assessment and determine how much but i'm thinking probably three times a week just to make sure that you can get your wrist as strong as possible to prevent like future injury now the cool thing about getting any kind of a bone break is that your your body comes out even stronger so this should n't happen again but unfortunately like it's these situations that oof that just kind of [patient] oof [doctor] these these deform these deformities that really that really kind of hurt is the short version alright no problem any other questions [patient] no well i am going on vacation do i need to cancel it like can i still go even with the i mean after the surgery [doctor] yeah [patient] do it as soon as possible i'm going a vacation in a month so [doctor] okay how long is the vacation [patient] it's only for like a couple weeks [doctor] okay well so you might have to postpone it just because depending on what physical therapy says right if they feel that you can sustain if you can like sustain the exercises while you're gone that if there's something that you can do by yourself then you should be fine but we do wan na give it you said that it's gon na happen in a couple of weeks [patient] no vacation in a month [doctor] okay okay yeah so how about in a month we come you come back let's do a checkup again see where we are at and then we can assess whether or not this is something that i would recommend you do [patient] that sounds good thank you [doctor] no problem bye [patient] bye [doctor] the fracture appears extra-articular and usually proximal to the radial ulnar joint dorsal angulation of the distal fracture fragment is present to a variable degree if dorsal angulation is severe presenting with a dinner fork deformity ulnar styloid fracture is present"""

#tokenize the conversation
model_inputs = tokenizer(validate,  max_length=max_input, padding='max_length', truncation=True)
#make prediction
raw_pred, _, _ = trainer.predict([model_inputs])
#decode the output
print(tokenizer.decode(raw_pred[0]))
