In [1]:
import torch
import nlp

from transformers import T5ForConditionalGeneration, T5Tokenizer

from tqdm.auto import tqdm

from sklearn import metrics

In [2]:
device = 'cuda' if torch.cuda.is_available else 'cpu'

#### Evaluate the model fine-tuned on commonsense_qa for 3 epochs

In [5]:
# Load the pretrained model
commonsense_qa_model = T5ForConditionalGeneration.from_pretrained('./models/commonsense_qa/10_epochs')
commonsense_qa_tokenizer = T5Tokenizer.from_pretrained('./models/commonsense_qa/10_epochs')

In [7]:
# Load the validation dataset
commonsense_qa_valid_dataset = torch.load('./data/commonsense_qa/valid_data.pt')
commonsense_qa_dataloader = torch.utils.data.DataLoader(commonsense_qa_valid_dataset, batch_size = 32)

In [13]:
# Generate predictions
commonsense_qa_predictions = []
commonsense_qa_targets = []
commonsense_qa_model.to(device)    
commonsense_qa_model.eval()
with torch.no_grad():
    for batch in tqdm(commonsense_qa_dataloader):
        prediction = commonsense_qa_model.generate(input_ids = batch['input_ids'].to(device), 
                          attention_mask = batch['attention_mask'].to(device),
                          max_length = 16)
        prediction = [commonsense_qa_tokenizer.decode(ids) for ids in prediction]
        target = [commonsense_qa_tokenizer.decode(ids) for ids in batch['target_ids']]
    
        commonsense_qa_predictions.extend(prediction)
        commonsense_qa_targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=39.0), HTML(value='')))




In [15]:
metrics.accuracy_score(commonsense_qa_targets, commonsense_qa_predictions)

0.6404586404586404

In [16]:
incorrect_idxs = [i for i, prediction in enumerate(commonsense_qa_predictions) if prediction != commonsense_qa_targets[i]]
for incorrect_idx in incorrect_idxs:
    print(commonsense_qa_tokenizer.decode(commonsense_qa_valid_dataset[incorrect_idx]['input_ids']))
    print("Target Answer: {}".format(commonsense_qa_tokenizer.decode(commonsense_qa_valid_dataset[incorrect_idx]['target_ids'])))
    print("Predicted Answer: {}".format(commonsense_qa_predictions[incorrect_idx]))

question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what? options: A: bank B: library C: department store D: mall E: new york
Target Answer: A: bank
Predicted Answer: D: mall
question: Where would you find magazines along side many other printed works? options: A: doctor B: bookstore C: market D: train station E: mortuary
Target Answer: B: bookstore
Predicted Answer: C: market
question: James was looking for a good place to buy farmland. Where might he look? options: A: midwest B: countryside C: estate D: farming areas E: illinois
Target Answer: A: midwest
Predicted Answer: C: estate
question: What island country is ferret popular? options: A: own home B: north carolina C: great britain D: hutch E: outdoors
Target Answer: C: great britain
Predicted Answer: B: north carolina
question: What would vinyl be an odd thing to replace? options: A: pants B: record albums C: record store D: cheese E: wallpaper
Target Answer: E: wallpap

question: Why would you be watching tv instead of doing something else? options: A: headache B: laughter C: laziness D: erections E: wasting time
Target Answer: C: laziness
Predicted Answer: E: wasting time
question: He had to wear a tuxedo while playing the keyboard instrument, so did the other hundred members of the what? options: A: music store B: band C: medium D: orchestra E: piano store
Target Answer: D: orchestra
Predicted Answer: B: band
question: Where do you find the most amount of leafs? options: A: floral arrangement B: ground C: forrest D: field E: compost pile
Target Answer: C: forrest
Predicted Answer: B: ground
question: Where would you find a monkey in the wild? options: A: zoo B: barrel C: research laboratory D: captivity E: thailand
Target Answer: E: thailand
Predicted Answer: D: captivity
question: If you take the risk buying a used car, you still hope it can what? options: A: go fast B: start running C: going too fast D: look good E: last several years
Target Answe

question: What happens at soon as a living being is born? options: A: expiration B: growing older C: sometimes bad D: death E: start reproduction
Target Answer: B: growing older
Predicted Answer: A: expiration
question: The cancer patient was expecting to die, so he made out his what? options: A: not to live B: write will C: never want D: seek help E: go to hell
Target Answer: B: write will
Predicted Answer: A: not to live
question: There was a toll road that meandered from Maine to New Hampshire, where was it? options: A: massachusetts B: new england C: my house D: new jersey E: connecticut
Target Answer: B: new england
Predicted Answer: A: massachusetts
question: The cat carefully navigated the area, they do everything they can to avoid what? options: A: get wet B: eat vegetables C: falling D: wool sweater E: sharp claws
Target Answer: A: get wet
Predicted Answer: C: falling
question: What does the sky do before a rain? options: A: appear beautiful B: appear blue C: shows a rainbow D

Target Answer: A: hen house
Predicted Answer: C: mountains
question: What do humans do to other humans after death? options: A: celebrate B: burial C: life D: rebirth E: decomposition
Target Answer: B: burial
Predicted Answer: A: celebrate
question: Where can you go to use a piano in your neighborhood if you don't have one? options: A: music school B: music store C: neighbor's house D: lunch E: drawing room
Target Answer: C: neighbor's house
Predicted Answer: B: music store
question: Where do most people turn to get information on their phones? options: A: internet B: book C: online D: google E: manual
Target Answer: D: google
Predicted Answer: A: internet
question: What type of non-vegetarian soup is one likely to find a potato? options: A: beef stew B: own kitchen C: clam chowder D: kitchen cabinet E: pantry
Target Answer: C: clam chowder
Predicted Answer: A: beef stew
question: They had a theory of what they could do in t he big game, so over and over they would what? options: A: pa

Target Answer: B: front door
Predicted Answer: D: street corner
question: The architect thought that a mezzanine would look good, but the planning committee rejected it. They told the architect that they felt it was a potential hazard given the ages of the people who would be using it. What might they be designing? options: A: actors B: theater C: concert hall D: floors E: school
Target Answer: E: school
Predicted Answer: C: concert hall
question: A person would join a trade school for finding information related to what? options: A: ulcers B: degree C: understanding of D: gaining knowledge E: happiness
Target Answer: D: gaining knowledge
Predicted Answer: C: understanding of
question: Joan was a baby, so there were many things she couldn't do, which caused problems for her parents. Name one thing that makes raising a baby difficult. options: A: arrive early B: learn to walk C: boy or girl D: bring joy E: talk nonsense
Target Answer: E: talk nonsense
Predicted Answer: B: learn to walk


Target Answer: D: dishwasher
Predicted Answer: C: table
question: The rats were hiding in the house, where were they? options: A: sewers B: laboratory C: basement D: clinic E: cellar
Target Answer: E: cellar
Predicted Answer: C: basement
question: Where is a likely place for an ivy plant? options: A: flower pot B: shelf C: windowsill D: outside E: sill
Target Answer: D: outside
Predicted Answer: A: flower pot
question: Where has the newest baseball stadium? options: A: phoenix B: chicago C: antarctica D: san francisco E: urban areas
Target Answer: A: phoenix
Predicted Answer: D: san francisco
question: What type of residence has a ground floor with a stoop? options: A: brownstone B: hotel C: condominium D: entering building E: office building
Target Answer: A: brownstone
Predicted Answer: C: condominium
question: What might happen if someone is not losing weight? options: A: loose skin B: beauty C: miss universe D: death E: healthier
Target Answer: D: death
Predicted Answer: A: loose s

question: What do people feel after having sex that requires them to shower? options: A: bedroom B: pleasant C: obesity D: painful E: dirty
Target Answer: E: dirty
Predicted Answer: D: painful
question: What is the thing that is agitated in your head when kissing? options: A: sexual stimulation B: herpes C: headache D: catch cold E: happiness
Target Answer: E: happiness
Predicted Answer: A: sexual stimulation
question: Billy was reading the newspaper as he commuted to work, but once he got to his destination he balled it up and put it somewhere. Where did it put it? options: A: trash B: floor C: subway D: ground E: lawn
Target Answer: A: trash
Predicted Answer: D: ground
question: Where do you keep a pail in your house? options: A: garage B: pool C: utility room D: hardware store E: wishing well
Target Answer: C: utility room
Predicted Answer: A: garage
question: The man was giving assistance to a pan handler in the streets, how did he give assistance? options: A: feeling good B: killi

question: The child felt like it was all pretend, he didn't understand what? options: A: people believe B: daydreams C: transcendentalism D: laughter E: religion
Target Answer: E: religion
Predicted Answer: A: people believe
question: Where do apples form on an apple tree? options: A: south africa B: sunshine C: new york D: bloom E: trunk
Target Answer: D: bloom
Predicted Answer: E: trunk
question: What can machines do that humans cannot? options: A: fail to work B: perform work C: answering questions D: see work E: fly
Target Answer: E: fly
Predicted Answer: B: perform work
question: The place where my linen closet is really needs repainting a light color as it only has one overhead light. options: A: house B: home C: pool house D: hallway E: bedroom
Target Answer: D: hallway
Predicted Answer: E: bedroom
question: Where is a bird likely to make it's home? options: A: forest B: nest C: roof D: leaves E: sky
Target Answer: A: forest
Predicted Answer: B: nest
question: How would you expr

#### Evaluate the model fine-tuned on social_i_qa for 2 epochs

In [17]:
# Load the pretrained model
social_i_qa_model = T5ForConditionalGeneration.from_pretrained('./models/social_i_qa')
social_i_qa_tokenizer = T5Tokenizer.from_pretrained('./models/social_i_qa')

In [18]:
# Load the validation dataset
social_i_qa_valid_dataset = torch.load('./data/social_i_qa/valid_data.pt')
social_i_qa_dataloader = torch.utils.data.DataLoader(social_i_qa_valid_dataset, batch_size = 32)

In [20]:
# Generate predictions
social_i_qa_predictions = []
social_i_qa_targets = []
social_i_qa_model.to(device)    
social_i_qa_model.eval()
with torch.no_grad():
    for batch in tqdm(social_i_qa_dataloader):
        prediction = social_i_qa_model.generate(input_ids = batch['input_ids'].to(device), 
                          attention_mask = batch['attention_mask'].to(device),
                          max_length = 16)
        prediction = [social_i_qa_tokenizer.decode(ids) for ids in prediction]
        target = [social_i_qa_tokenizer.decode(ids) for ids in batch['target_ids']]
    
        social_i_qa_predictions.extend(prediction)
        social_i_qa_targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=62.0), HTML(value='')))




In [22]:
metrics.accuracy_score(social_i_qa_targets, social_i_qa_predictions)

0.6371545547594678

In [25]:
incorrect_idxs = [i for i, prediction in enumerate(social_i_qa_predictions) if prediction != social_i_qa_targets[i]]
for incorrect_idx in incorrect_idxs:
    print(social_i_qa_tokenizer.decode(social_i_qa_valid_dataset[incorrect_idx]['input_ids']))
    print("Target Answer: {}".format(social_i_qa_tokenizer.decode(social_i_qa_valid_dataset[incorrect_idx]['target_ids'])))
    print("Predicted Answer: {}".format(social_i_qa_predictions[incorrect_idx]))

question: How would you describe Sydney? context: Sydney walked past a homeless woman asking for change but did not have any money they could give to her. Sydney felt bad afterwards. options: A: sympathetic B: like a person who was unable to help C: incredulous
Target Answer: A: sympathetic
Predicted Answer: B: like a person who was unable to help
question: How would Jordan feel afterwards? context: Jordan was in charge of taking the food on the camping trip and left all the food at home. options: A: horrible that he let his friends down on the camping trip B: happy that he doesn't need to do the cooking on the trip C: very proud and accomplished about the camping trip
Target Answer: A: horrible that he let his friends down on the camping trip
Predicted Answer: C: very proud and accomplished about the camping trip
question: How would you describe Aubrey? context: Aubrey never told Riley the answer and Riley was angry. options: A: rude B: smug at knowing the answer C: annoyed at Riley's

question: What will happen to Riley? context: Riley had a lot of friends. options: A: they will play with Riley B: they will like Riley C: interacted outside the home a lot
Target Answer: A: they will play with Riley
Predicted Answer: C: interacted outside the home a lot
question: Why did Sasha do this? context: Sasha grabbed Robins head and gave it a good shake. options: A: joke around B: bully C: better
Target Answer: A: joke around
Predicted Answer: C: better
question: What does Jordan need to do before this? context: Jordan paid his debt to society by completing several hours of community service. options: A: volunteer to do community service B: commit a crime against society C: be a law-abiding citizen all the time
Target Answer: B: commit a crime against society
Predicted Answer: A: volunteer to do community service
question: What will happen to Others? context: Aubrey met a creepy stranger at the park who was trying to get Aubrey to go home with them. options: A: be kidnapped B:

question: What will happen to Alex? context: Alex started going into labor, so she went to the hospital where she gave birth to Sasha. options: A: she will live a good life B: be happy C: she will be a baby
Target Answer: B: be happy
Predicted Answer: C: she will be a baby
question: What will Jordan want to do next? context: Jordan was mentioned in Casey's chapter but the description of her wasn't very flattering. options: A: tell Casey what she thought of her B: Ask Casey to delete the chapter C: Ask Casey to autograph the book
Target Answer: B: Ask Casey to delete the chapter
Predicted Answer: A: tell Casey what she thought of her
question: What will Remy want to do next? context: Remy was not supposed to do it, but he gave you Skylar's netflix account and pretended it was his own. options: A: get his own Netflix account B: cancel his Netflix account C: know a username
Target Answer: A: get his own Netflix account
Predicted Answer: B: cancel his Netflix account
question: What will ha

Target Answer: B: Carson's mother will scold them
Predicted Answer: C: Others will be punished
question: How would you describe Kai? context: Kai had some choice words to say but she decided to keep the language in check. options: A: intelligent B: ignorant C: like the better person for not engaging in the petty fight
Target Answer: C: like the better person for not engaging in the petty fight
Predicted Answer: A: intelligent
question: What will Tracy want to do next? context: Tracy gave birth to Addison and was very proud of her new child that day. options: A: have a baby of her own B: proud C: experience motherhood for the first time
Target Answer: C: experience motherhood for the first time
Predicted Answer: A: have a baby of her own
question: What will Addison want to do next? context: Addison and their friends were playing hide and seek at recess. Addison ran away to go find a hiding place. options: A: win the game of tag B: Come outside C: win the game of hide and seek
Target Ans

question: How would Carson feel as a result? context: kai was an outrageous fool so he stole carson's tools. options: A: as calm B: out of control C: as angry
Target Answer: C: as angry
Predicted Answer: B: out of control
question: What will Skylar want to do next? context: Kendall increased to Skylar's rate and felt that Skylar was too expensive and wanted to charge less themselves. options: A: convince Kendall to stay at their rate B: berate skylar C: find somewhere else
Target Answer: A: convince Kendall to stay at their rate
Predicted Answer: C: find somewhere else
question: How would Sasha feel as a result? context: Skylar has been working every day at getting her blackbelt and finally joined Sasha as having one. options: A: happy she finally earned her blackbelt B: glad Skylar got a blackbelt C: glad Skylar got a green belt
Target Answer: B: glad Skylar got a blackbelt
Predicted Answer: A: happy she finally earned her blackbelt
question: How would you describe Addison? context: A

question: Why did Carson do this? context: Carson was trying to study for a big test, so Carson turned Sydney's music down. options: A: listen to loud music B: make Sydney upset C: make things quiet
Target Answer: C: make things quiet
Predicted Answer: B: make Sydney upset
question: Why did Jesse do this? context: Jesse got the results for the test and got an A as a grade. options: A: keep it up B: celebrate C: studied hard
Target Answer: C: studied hard
Predicted Answer: A: keep it up
question: What will Robin want to do next? context: Robin took the test quickly because they wanted to pass quickly with flying colors. options: A: pass B: do well on the test C: fail
Target Answer: B: do well on the test
Predicted Answer: A: pass
question: How would Remy feel as a result? context: Quinn murders Remy's wife after Remy stole a lot of money from the family. options: A: had felt passive B: like the got away with it C: had felt angry
Target Answer: C: had felt angry
Predicted Answer: B: like

Target Answer: A: cruel
Predicted Answer: C: someone who likes to pull pranks
question: What will Sydney want to do next? context: Sydney took the trash to the burn barrel then set the trash on fire. options: A: Go inside B: Put out the fire C: Gather the trash needed for disposal
Target Answer: A: Go inside
Predicted Answer: B: Put out the fire
question: What will Jordan want to do next? context: Jordan mentioned Casey in chapter one, because he is planning on bending the knee with a ring in hand very soon. options: A: go to a theme park with Casey B: propose to Casey for marriage C: buy the ring
Target Answer: B: propose to Casey for marriage
Predicted Answer: C: buy the ring
question: How would Others feel as a result? context: Casey decided to learn about cooking so she took a Saturday evening cooking class instead of spending time with her boyfriend. options: A: excited to date a good cook B: like they are cooking too C: inattentive to the needs of her boyfriend at time
Target Ans

question: How would you describe Skylar? context: Skylar could not hang the moon but they could turn silver into gold. options: A: Talented B: Hated C: like a wizard
Target Answer: A: Talented
Predicted Answer: C: like a wizard
question: How would the man feel after? context: sasha had enough, she gave the man her two cents and let him have it. options: A: sorry for their actions B: big and proud C: frustrated
Target Answer: A: sorry for their actions
Predicted Answer: B: big and proud
question: Why did Quinn do this? context: quinn became friends with a cool guy who had a house so quinn moved into the house. options: A: be nice to the cool guy B: be able to be next to the guy whenever she wanted C: go against her parents
Target Answer: B: be able to be next to the guy whenever she wanted
Predicted Answer: A: be nice to the cool guy
question: What will Austin want to do next? context: Sasha spent Austin's money trying to win a prize even when the odds were stacked against her. options:

question: How would you describe Kendall? context: Kendall watched baseball every week to cheer for their favorite team. options: A: Indifferent to watch B: Loyal to others C: excited
Target Answer: B: Loyal to others
Predicted Answer: C: excited
question: What will happen to Others? context: Skylar played some soccer with their friend after school that day and had fun. options: A: join the game B: go to a bakery C: be tired from playing soccer
Target Answer: C: be tired from playing soccer
Predicted Answer: A: join the game
question: How would you describe Remy? context: Remy was playing a video game in their spare time. They prevented the enemies from engaging. options: A: a technical person B: a hard worker C: a skilled laborer
Target Answer: A: a technical person
Predicted Answer: B: a hard worker
question: What will happen to Riley? context: Riley grew very suspicious of Jesse because they snuck out every night at 11 PM. options: A: lock all the doors and windows at night B: be pu

Target Answer: B: go to sleep
Predicted Answer: C: hear the snoring
question: What does Lee need to do before this? context: Lee made copies of the their poster before starting his mayoral campaign. options: A: needed to design the poster for his campaign B: has the funds C: needed to drop out of the campaign
Target Answer: A: needed to design the poster for his campaign
Predicted Answer: B: has the funds
question: What does Taylor need to do before this? context: Their friend asked them to take a look at the engine, so Taylor studied the car carefully. options: A: open the hood B: be a mechanic C: buy a car
Target Answer: B: be a mechanic
Predicted Answer: C: buy a car
question: What will Jesse want to do next? context: Jesse was out sick for the day so Tracy was covering his class. She took his student's to the playground for recess. options: A: get better B: review their homework C: cover the next lesson
Target Answer: A: get better
Predicted Answer: C: cover the next lesson
questio

question: What will Cameron want to do next? context: Cameron got to the yard and stopped the dog fight before someone else. options: A: wash his hands good B: laugh at the dogs C: save on vet bills
Target Answer: A: wash his hands good
Predicted Answer: C: save on vet bills
question: What will Others want to do next? context: jan went to the same school for years so she was best friends with a girl. options: A: appreciate jan B: make sure they kept up their relationship C: hate jan
Target Answer: B: make sure they kept up their relationship
Predicted Answer: A: appreciate jan
question: Why did Alex do this? context: Alex walked Robin towards the execution chamber for her last meal. options: A: work at the jail B: So Robin can eat C: release her
Target Answer: B: So Robin can eat
Predicted Answer: C: release her
question: How would you describe Kendall? context: Kendall took Skylar's schedule into account when planning the trip for their summer vacation. options: A: supported B: includ

Predicted Answer: C: lose money
question: What will happen to Others? context: Riley and their friend were best friends. options: A: share happiness with their friend B: share sadness with their friend C: have fun doing things with Riley
Target Answer: A: share happiness with their friend
Predicted Answer: C: have fun doing things with Riley
question: What does Cameron need to do before this? context: Cameron took Jan's motorcycle for a spin without permission. options: A: Check the gas B: Grab his jacket C: Check the oil
Target Answer: A: Check the gas
Predicted Answer: B: Grab his jacket
question: How would you describe Sasha? context: Sasha was playing a game but they got tired of it. options: A: fed up B: Someone that want's something else to do C: bored
Target Answer: C: bored
Predicted Answer: A: fed up
question: What will Bailey want to do next? context: Cameron had hired Bailey to help with an important job and felt satisfied with Bailey's work. options: A: Apply for unemployme

Target Answer: A: make Sydney take charge
Predicted Answer: C: relax
question: What will Carson want to do next? context: Carson ordered a large pizza for dinner. options: A: cut the grass B: washes his hands before dinner C: take out the dog
Target Answer: B: washes his hands before dinner
Predicted Answer: C: take out the dog
question: What will Kai want to do next? context: Kai wanted everything on their list for Christmas. options: A: go Christmas shopping B: joy C: tell their list
Target Answer: C: tell their list
Predicted Answer: A: go Christmas shopping
question: What will happen to Alex? context: Alex set Riley on fire in the game when Riley had low health left. options: A: win the game B: will get sick after the game C: will quit playing
Target Answer: A: win the game
Predicted Answer: B: will get sick after the game
question: What will their step dad want to do next? context: Riley left home with their sister today, because they were grounded by their step dad. options: A: r

question: What will Others want to do next? context: Bailey had been feeling angry for a quite a while, one day she had enough and expressed her anger to the others. options: A: leave Bailey alone B: release pressure C: Apologize to Bailey
Target Answer: A: leave Bailey alone
Predicted Answer: C: Apologize to Bailey
question: How would you describe Quinn? context: Quinn saw their dog get hit by a car. They cried their eyes out. options: A: terrible B: sentimental C: vengeful
Target Answer: B: sentimental
Predicted Answer: C: vengeful
question: How would you describe Jesse? context: Jesse went to the zoo with people from church. They saw all the animals in the zoo. options: A: inquisitive B: excited C: wandering
Target Answer: B: excited
Predicted Answer: A: inquisitive
question: How would Jan feel afterwards? context: Jan went to work on the morning of the party because she was called in. options: A: tired B: happy C: loyal to work
Target Answer: A: tired
Predicted Answer: C: loyal to 

Target Answer: B: solve problems
Predicted Answer: A: take care of it
question: What will Bailey want to do next? context: Bailey got a promotion at work after working there for only a week. options: A: fire their boss B: find a job with better promotions C: do a good job for more promotions
Target Answer: C: do a good job for more promotions
Predicted Answer: B: find a job with better promotions
question: How would Tracy feel as a result? context: Quinn held onto Tracy's forms because Tracy didn't want Quinn to lose them on the bus. options: A: be mad about being the form guide B: dutiful C: be relieved to not have to locate the forms
Target Answer: B: dutiful
Predicted Answer: C: be relieved to not have to locate the forms
question: What will Kendall want to do next? context: Kendall got ready and snatched their journal away to keep it private. options: A: lock their journal away B: hide information C: hide their journal
Target Answer: C: hide their journal
Predicted Answer: A: lock 

Target Answer: B: hopeful
Predicted Answer: A: A hardworking person
question: How would Riley feel as a result? context: Riley talked to their friends at the party and had a good time. options: A: connected to friends B: social C: one with friends
Target Answer: A: connected to friends
Predicted Answer: B: social
question: How would you describe Carson? context: Carson dug around in the bag and ate Bailey's candy once he had found the stash of it. options: A: Someone who hates candy and sweets B: Someone who steals from people C: angry
Target Answer: B: Someone who steals from people
Predicted Answer: A: Someone who hates candy and sweets
question: What will Jesse want to do next? context: Bailey passed Jesse an examination booklet and they both grabbed their pencils and waited for the go ahead to start the test. options: A: do well on the test B: finish the test C: fill in the test
Target Answer: A: do well on the test
Predicted Answer: B: finish the test
question: How would the other

question: How would you describe Jordan? context: Jordan affected children's happiness by always yelling at them and telling them no. options: A: mad B: authoritative C: regretful
Target Answer: B: authoritative
Predicted Answer: A: mad
question: What does Aubrey need to do before this? context: Aubrey took tennis lessons as a method to get in shape. options: A: stay healthy B: go to the game C: get tennis clothes
Target Answer: C: get tennis clothes
Predicted Answer: B: go to the game
question: How would you describe Addison? context: Addison turned their music down because they were making noise. options: A: bored B: annoyed C: thoughtful
Target Answer: C: thoughtful
Predicted Answer: B: annoyed
question: Why did Cameron do this? context: Cameron moved Kendall's body into a trash bag. options: A: was nice B: killed Kendall C: needed to buy trash bags
Target Answer: B: killed Kendall
Predicted Answer: A: was nice
question: What will Casey want to do next? context: Casey finds a baby k

question: What will Bailey want to do next? context: Bailey passed Jesse an examination booklet after she decided not to study that subject anymore. options: A: give the booklet back to Bailey B: leave the class C: throw away the examination booklet
Target Answer: B: leave the class
Predicted Answer: A: give the booklet back to Bailey
question: How would you describe Skylar? context: Skylar was outdoors in the wintertime and pulled the wool over their eyes and face. options: A: feeling cold B: wearing wool C: feeling warm
Target Answer: A: feeling cold
Predicted Answer: C: feeling warm
question: What does Skylar need to do before this? context: Skylar distributed the data in several forms without double checking the figures. options: A: gather the data B: hand out the forms C: rush things
Target Answer: C: rush things
Predicted Answer: A: gather the data
question: How would Aubrey feel as a result? context: Aubrey sounded good today considering they had been sick last week. options: A:

question: What will Others want to do next? context: Alex and his cohorts, the former criminal masterminds for the Lupino family, made their escape from jail. options: A: Bribe the guards B: contact the FBI C: Plan the escape
Target Answer: B: contact the FBI
Predicted Answer: C: Plan the escape
question: How would you describe Kai? context: Kai was swinging the bat without paying attention and swung through the posts. options: A: Angry B: Cautious C: Think about that swing
Target Answer: C: Think about that swing
Predicted Answer: A: Angry
question: How would you describe Riley? context: Riley was competing in a gymnastics meet. Riley felt worse after she lost. options: A: competitive B: upset C: regretful
Target Answer: B: upset
Predicted Answer: A: competitive
question: How would Others feel as a result? context: Jordan found out that they were very ill, but they made light of the ailment. options: A: feeling sad B: Humorous C: Uncomfortable
Target Answer: C: Uncomfortable
Predicted

#### Evaluate the model fine-tuned on common_gen for 1 epoch

In [3]:
# Load the pretrained model
common_gen_model = T5ForConditionalGeneration.from_pretrained('./models/common_gen')
common_gen_tokenizer = T5Tokenizer.from_pretrained('./models/common_gen')

In [13]:
# Load the validation dataset
common_gen_valid_dataset = torch.load('./data/common_gen/valid_data.pt')
common_gen_dataloader = torch.utils.data.DataLoader(common_gen_valid_dataset, batch_size = 32)

In [21]:
common_gen_predictions = []
common_gen_targets = []
common_gen_model.to(device)    
common_gen_model.eval()
with torch.no_grad():
    for batch in tqdm(common_gen_dataloader):
        prediction = common_gen_model.generate(
            input_ids = batch["input_ids"].to(device),
            attention_mask = batch["attention_mask"].to(device),
            num_beams = 5,
            length_penalty = 0.6,
            max_length = 32 + 2,  # +2 from original because we start at step=1 and stop before max_length
            #min_length = 1 + 1,  # +1 from original because we start at step=1
            no_repeat_ngram_size = 2,
            early_stopping = True
        )
        prediction = [common_gen_tokenizer.decode(ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) for ids in prediction]
        target = [common_gen_tokenizer.decode(ids) for ids in batch['target_ids']]
        
        common_gen_predictions.extend(prediction)
        common_gen_targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




In [24]:
common_gen_predictions[0:100]

['A man stands in a field looking up.',
 'A man stands in a field looking up.',
 'A man stands in a field looking up.',
 'A man stands in a field looking up.',
 'A kid is dancing in a room.',
 'A kid is dancing in a room.',
 'A kid is dancing in a room.',
 'A kid is dancing in a room.',
 'A cat is petting on a couch.',
 'A cat is petting on a couch.',
 'A cat is petting on a couch.',
 'A cat is petting on a couch.',
 'A man climbs the side of a building.',
 'A man climbs the side of a building.',
 'A man climbs the side of a building.',
 'A man climbs the side of a building.',
 'A man climbs a wall and talks to someone.',
 'A man climbs a wall and talks to someone.',
 'A man climbs a wall and talks to someone.',
 'A man climbs a wall and talks to someone.',
 'A car driving in the snow.',
 'A car driving in the snow.',
 'A car driving in the snow.',
 'A car driving in the snow.',
 'A woman is wearing a phone to talk to someone.',
 'A woman is wearing a phone to talk to someone.',
 'A wo

In [25]:
common_gen_targets[0:100]

['The player stood in the field looking at the batter.',
 'The coach stands along the field, looking at the goalkeeper.',
 'I stood and looked across the field, peacefully.',
 'Someone stands, looking around the empty field.',
 'The silly kid loves to dance in her room.',
 'the dance kid room is full of kids',
 'A kid is dancing in the room.',
 'A group of kids are dancing around a living room.',
 'A pet cat likes to sleep on a couch.',
 'My pet cat love to sleep on the couch.',
 'The woman pet the cat that was sat on the couch.',
 'actor petting long haired gray cat on couch',
 'The mouse climbed the side of the building.',
 'I climbed the side of the building.',
 'A girl climbs up the side of a building.',
 'ivy climbs the side of a building.',
 'The woman thats talking teaches her students how to climb the wall.',
 'The person climbed up the wall to talk to the man.',
 'The man talked about how to climb walls.',
 'A man is talking in front of a climbing wall.',
 'The car drove throu

In [23]:
input_ids = common_gen_valid_dataset[0:100]['input_ids']
[common_gen_tokenizer.decode(ids) for ids in input_ids]

['generate sentence: field look stand',
 'generate sentence: field look stand',
 'generate sentence: field look stand',
 'generate sentence: field look stand',
 'generate sentence: kid room dance',
 'generate sentence: kid room dance',
 'generate sentence: kid room dance',
 'generate sentence: kid room dance',
 'generate sentence: cat pet couch',
 'generate sentence: cat pet couch',
 'generate sentence: cat pet couch',
 'generate sentence: cat pet couch',
 'generate sentence: climb building side',
 'generate sentence: climb building side',
 'generate sentence: climb building side',
 'generate sentence: climb building side',
 'generate sentence: climb wall talk',
 'generate sentence: climb wall talk',
 'generate sentence: climb wall talk',
 'generate sentence: climb wall talk',
 'generate sentence: drive snow car',
 'generate sentence: drive snow car',
 'generate sentence: drive snow car',
 'generate sentence: drive snow car',
 'generate sentence: talk wear phone',
 'generate sentence: 