In [1]:
import torch
import nlp

from transformers import T5ForConditionalGeneration, T5Tokenizer

from tqdm.auto import tqdm

from sklearn import metrics

#### Evaluate the model fine-tuned on commonsense_qa for 3 epochs

In [3]:
# Load the pretrained model
model = T5ForConditionalGeneration.from_pretrained('./models/commonsense_qa/3_epochs')
tokenizer = T5Tokenizer.from_pretrained('./models/commonsense_qa/3_epochs')

In [6]:
# Load the validation dataset
valid_dataset = torch.load('./data/commonsense_qa/valid_data.pt')
dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size = 32)

In [8]:
# Generate predictions
predictions = []
targets = []
for batch in tqdm(dataloader):
    prediction = model.generate(input_ids = batch['input_ids'], 
                          attention_mask = batch['attention_mask'],
                          max_length = 16,
                          early_stopping = True)
    prediction = [tokenizer.decode(ids) for ids in prediction]
    target = [tokenizer.decode(ids) for ids in batch['target_ids']]
    
    predictions.extend(prediction)
    targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=39.0), HTML(value='')))

  return function(data_struct)





In [9]:
metrics.accuracy_score(targets, predictions)

0.6240786240786241

In [10]:
incorrect_idxs = [i for i, prediction in enumerate(predictions) if prediction != targets[i]]
for incorrect_idx in incorrect_idxs:
    print(tokenizer.decode(valid_dataset[incorrect_idx]['input_ids']))
    print("Target Answer: {}".format(tokenizer.decode(valid_dataset[incorrect_idx]['target_ids'])))
    print("Predicted Answer: {}".format(predictions[incorrect_idx]))

question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what? options: A: bank B: library C: department store D: mall E: new york
Target Answer: A: bank
Predicted Answer: D: mall
question: Where would you find magazines along side many other printed works? options: A: doctor B: bookstore C: market D: train station E: mortuary
Target Answer: B: bookstore
Predicted Answer: C: market
question: James was looking for a good place to buy farmland. Where might he look? options: A: midwest B: countryside C: estate D: farming areas E: illinois
Target Answer: A: midwest
Predicted Answer: C: estate
question: What island country is ferret popular? options: A: own home B: north carolina C: great britain D: hutch E: outdoors
Target Answer: C: great 
Predicted Answer: B: north car
question: What would vinyl be an odd thing to replace? options: A: pants B: record albums C: record store D: cheese E: wallpaper
Target Answer: E: wallpaper
Predicted

Target Answer: C: lazi
Predicted Answer: E: wasting
question: If chewing food is difficult for you, what is a possible reason? options: A: broken jaw B: sore mouth C: eating D: good digestion E: avoiding choking
Target Answer: B: sore
Predicted Answer: A: broken jaw
question: Where do you find the most amount of leafs? options: A: floral arrangement B: ground C: forrest D: field E: compost pile
Target Answer: C: forrest
Predicted Answer: B: ground
question: If you take the risk buying a used car, you still hope it can what? options: A: go fast B: start running C: going too fast D: look good E: last several years
Target Answer: E: last several
Predicted Answer: D: look good
question: Dan was ditting quietly on the couch with a book in his hand. Laurie thought that he was just focused on what he was doing, but he actually did what? options: A: eat B: think C: reading D: meditate E: fall asleep
Target Answer: E: fall asleep
Predicted Answer: B: think
question: What do airplanes do as they

question: What do you ask a child to do when you first meet her? options: A: ask questions B: count to ten C: costume D: state name E: dress herself
Target Answer: D: state name
Predicted Answer: E: dress herself
question: Where can you store your dishes in your dwelling? options: A: drawer B: shelf C: pantry D: apartment E: cabinet
Target Answer: B: shelf
Predicted Answer: C: pantry
question: The man laid on the soft moss and looked up at the trees, where was the man? options: A: niagra falls B: forest C: waterfall D: ground E: tree
Target Answer: B: forest
Predicted Answer: E: tree
question: Where can I find a stapler in many places? options: A: desk drawer B: office building C: manual D: office supply store E: desktop
Target Answer: B: office building
Predicted Answer: D: office supply
question: Where would you find a toy soldier that is being played with? options: A: toy box B: movies C: child's hand D: toybos E: child park
Target Answer: C: child'
Predicted Answer: B: movies
quest

question: Name a location where you would not want to find mice. options: A: loft B: attic C: bell cat D: countryside E: laboratory
Target Answer: B: attic
Predicted Answer: D: countryside
question: Where do most people turn to get information on their phones? options: A: internet B: book C: online D: google E: manual
Target Answer: D: google
Predicted Answer: A: internet
question: They had a theory of what they could do in t he big game, so over and over they would what? options: A: park B: practice C: fact D: practical E: practise
Target Answer: B: practice
Predicted Answer: E: practise
question: When you see something rise, you are where in relation to it? options: A: sun set B: near C: fall D: below E: lower
Target Answer: D: below
Predicted Answer: B: near
question: What do you do when you need to get food? options: A: table B: disneyland C: refrigerators D: pantry E: shop
Target Answer: E: shop
Predicted Answer: D: pantry
question: What has someone who had finished their undergra

Target Answer: E: i
Predicted Answer: D: ken
question: Where is a well used toy car likely to be found? options: A: child's room B: boy's bedroom C: own home D: toy store E: house
Target Answer: A: child'
Predicted Answer: C: own home
question: Where would you find an office worker gossiping with their colleagues? options: A: water cooler B: space shuttle C: baby shower D: bus stop E: family
Target Answer: A: water cooler
Predicted Answer: D: bus stop
question: Where would you put nails if they are already packaged? options: A: pocket B: container C: cabinet D: jar E: store
Target Answer: C: cabinet
Predicted Answer: B: container
question: The man acted ridiculous at the funeral, what attitude should he have taken? options: A: straightforward B: serious C: solemn D: somber E: funny
Target Answer: C: solem
Predicted Answer: B: serious
question: He was trying to procreate with many individuals, this led to a what? options: A: moaning B: die C: kiss D: std E: sanity
Target Answer: D: s
Pr

question: The rats were hiding in the house, where were they? options: A: sewers B: laboratory C: basement D: clinic E: cellar
Target Answer: E: cellar
Predicted Answer: C: basement
question: James saw a kite flying in the sky. He traced the string back to its origin and found it. Where did the string begin? options: A: end of line B: hobby shop C: his hand D: toy store E: child's hand
Target Answer: E: child'
Predicted Answer: D: toy
question: Where is a likely place for an ivy plant? options: A: flower pot B: shelf C: windowsill D: outside E: sill
Target Answer: D: outside
Predicted Answer: C: windowsill
question: Where has the newest baseball stadium? options: A: phoenix B: chicago C: antarctica D: san francisco E: urban areas
Target Answer: A: pho
Predicted Answer: D: s
question: What type of residence has a ground floor with a stoop? options: A: brownstone B: hotel C: condominium D: entering building E: office building
Target Answer: A: brownstone
Predicted Answer: C: condominium


question: What will happen to animals after eating food? options: A: bite B: digestion C: feel pleasure D: pass water E: listen to each other
Target Answer: C: feel pleasure
Predicted Answer: B: digestion
question: James need to use a toilet but there were no public ones in sight. Eventually he broke down and did something very expensive so that he could get a toilet. Where might he have gone? options: A: motel room B: apartment C: bathroom D: games E: house
Target Answer: A: motel
Predicted Answer: E: house
question: The trucker plopped on the bench with a sense of relief, where did he arrive? options: A: bordello B: rest area C: garden D: bus stop E: state park
Target Answer: B: rest area
Predicted Answer: D: bus stop
question: There was no shade for Jenny. She was forced to lie there exposed to what? options: A: full sunlight B: bright sunshine C: sunny place D: eat cake E: direct sunlight
Target Answer: A: full sunlight
Predicted Answer: E: direct sunlight
question: What do people 

Target Answer: A: babies
Predicted Answer: D: rapport
question: Where could you find a shark before it was caught? options: A: pool hall B: tomales bay C: marine museum D: business E: desert
Target Answer: B: tomale
Predicted Answer: C: marine museum
question: Where is one likely to find poker chips? options: A: supermarket B: pantry C: motherboard D: bar E: bar
Target Answer: D: bar
Predicted Answer: E: bar
question: Dance can be elegant and specific, or you can just have fun and what? options: A: falling down B: trip C: fall down D: move around E: celebrate
Target Answer: D: move around
Predicted Answer: E: celebrate
question: People played a variety of games in the soccer field. It was the closest thing they had to what? options: A: town B: beach C: park D: near E: outside
Target Answer: C: park
Predicted Answer: D: near
question: What is likely to have a better school cafeteria? options: A: high school B: canteen C: polytechnic D: large room E: all kinds of schools
Target Answer: C

#### Evaluate the model fine-tuned on social_i_qa for 2 epochs, then commonsense_qa for 3 epochs

In [19]:
# Load the pretrained model
model = T5ForConditionalGeneration.from_pretrained('./models/social_i_qa_commonsense_qa')
tokenizer = T5Tokenizer.from_pretrained('./models/social_i_qa_commonsense_qa')

In [20]:
# Generate predictions
predictions = []
targets = []
for batch in tqdm(dataloader):
    prediction = model.generate(input_ids = batch['input_ids'], 
                          attention_mask = batch['attention_mask'],
                          max_length = 16,
                          early_stopping = True)
    prediction = [tokenizer.decode(ids) for ids in prediction]
    target = [tokenizer.decode(ids) for ids in batch['target_ids']]
    
    predictions.extend(prediction)
    targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=39.0), HTML(value='')))




In [21]:
metrics.accuracy_score(targets, predictions)

0.6224406224406225