In [1]:
import torch
import nlp

from transformers import T5ForConditionalGeneration, T5Tokenizer

from tqdm.auto import tqdm

from sklearn import metrics

In [2]:
device = 'cuda' if torch.cuda.is_available else 'cpu'

#### Evaluate the model fine-tuned on commonsense_qa for 10 epochs

In [4]:
# Load the pretrained model
commonsense_qa_model = T5ForConditionalGeneration.from_pretrained('./models/commonsense_qa/10_epochs')
commonsense_qa_tokenizer = T5Tokenizer.from_pretrained('./models/commonsense_qa/10_epochs')

In [7]:
# Load the validation dataset
commonsense_qa_valid_dataset = torch.load('./data/commonsense_qa/valid_data.pt')
commonsense_qa_dataloader = torch.utils.data.DataLoader(commonsense_qa_valid_dataset, batch_size = 16)

In [8]:
len(commonsense_qa_valid_dataset)

1221

In [11]:
# Generate predictions
commonsense_qa_predictions = []
commonsense_qa_targets = []
commonsense_qa_model.to(device)    
commonsense_qa_model.eval()
with torch.no_grad():
    for batch in tqdm(commonsense_qa_dataloader):
        prediction = commonsense_qa_model.generate(input_ids = batch['input_ids'].to(device), 
                          attention_mask = batch['attention_mask'].to(device),
                          max_length = 10)
        prediction = [commonsense_qa_tokenizer.decode(ids) for ids in prediction]
        target = [commonsense_qa_tokenizer.decode(ids) for ids in batch['target_ids']]
    
        commonsense_qa_predictions.extend(prediction)
        commonsense_qa_targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=77.0), HTML(value='')))




In [12]:
metrics.accuracy_score(commonsense_qa_targets, commonsense_qa_predictions)

0.6257166257166257

In [13]:
incorrect_idxs = [i for i, prediction in enumerate(commonsense_qa_predictions) if prediction != commonsense_qa_targets[i]]
for incorrect_idx in incorrect_idxs:
    print(commonsense_qa_tokenizer.decode(commonsense_qa_valid_dataset[incorrect_idx]['input_ids']))
    print("Target Answer: {}".format(commonsense_qa_tokenizer.decode(commonsense_qa_valid_dataset[incorrect_idx]['target_ids'])))
    print("Predicted Answer: {}".format(commonsense_qa_predictions[incorrect_idx]))

question: A revolving door is convenient for two direction travel, but it also serves as a security measure at a what? options: A: bank B: library C: department store D: mall E: new york
Target Answer: A: bank
Predicted Answer: D: mall
question: Where would you find magazines along side many other printed works? options: A: doctor B: bookstore C: market D: train station E: mortuary
Target Answer: B: bookstore
Predicted Answer: C: market
question: James was looking for a good place to buy farmland. Where might he look? options: A: midwest B: countryside C: estate D: farming areas E: illinois
Target Answer: A: midwest
Predicted Answer: C: estate
question: What island country is ferret popular? options: A: own home B: north carolina C: great britain D: hutch E: outdoors
Target Answer: C: great britain
Predicted Answer: B: north carolina
question: What do animals do when an enemy is approaching? options: A: feel pleasure B: procreate C: pass water D: listen to each other E: sing
Target Ans

question: Bob's only light source was a small bulb. There were four walls, if there was a door he couldn't see it. What was Bob in? options: A: closed room B: sky C: dard D: his grave E: house
Target Answer: A: closed room
Predicted Answer: E: house
question: James thought of criminal justice like a computer program. It need to work right. What ideas might James not like? options: A: manual B: process information C: power down D: control model E: reason exists
Target Answer: D: control model
Predicted Answer: A: manual
question: With the card slot lit up he knew how to get started finding his balance with what? options: A: slot machine B: ticket machine C: bank machine D: telephone E: automated teller
Target Answer: E: automated teller
Predicted Answer: C: bank machine
question: The child pretended he was reading newspaper, he couldn't actually do it without what? options: A: patience B: falling down C: literacy D: buying E: knowing how to read
Target Answer: E: knowing how to read
Pre

question: What could a driving car do to a pedestrian? options: A: say hello B: wreak C: pollution D: smoke E: relaxation
Target Answer: B: wreak
Predicted Answer: A: say hello
question: What might a couple have a lot of when they are deciding on stopping being married to each other? options: A: pleasure B: detachment C: exercise D: bankruptcy E: fights
Target Answer: E: fights
Predicted Answer: B: detachment
question: Billy hated using other people's toilets. He was only comfortable on his own. So whenever he needed to poop, he would go back to his what? options: A: bathroom B: motel room C: nearest public restroom D: house E: apartment
Target Answer: D: house
Predicted Answer: A: bathroom
question: The forest experienced a cold winter, where is it located? options: A: earth B: south america C: amazon basin D: temperate zone E: national park
Target Answer: D: temperate zone
Predicted Answer: C: amazon basin
question: Where is one likely to find a brownstone? options: A: new york city 

Target Answer: C: mammals
Predicted Answer: B: males
question: John was an aristocratic fox hunter. Where might he live? options: A: england B: new hampshire C: street D: arkansas E: north dakota
Target Answer: B: new hampshire
Predicted Answer: A: england
question: Where is a grape likely to be being fed to someone else? options: A: field B: bathroom C: michigan D: minnesota E: painting
Target Answer: E: painting
Predicted Answer: B: bathroom
question: How is riding a bike getting it to move? options: A: practice B: sense of balance C: driving D: good balance E: pedalling
Target Answer: E: pedalling
Predicted Answer: A: practice
question: WHat type of keyboard is made up of one or more pipe divisions? options: A: killing B: typewriter C: office D: terminal E: organ
Target Answer: E: organ
Predicted Answer: B: typewriter
question: If I want to open a steakhouse, what should I get first? options: A: michigan B: florida C: wine D: texas E: building
Target Answer: E: building
Predicted An

question: Where would a person live if they wanted no neighbors? options: A: housing estate B: neighborhood C: mars D: woods E: suburbs
Target Answer: D: woods
Predicted Answer: E: suburbs
question: What happens to a dog before someone puts up posters of them? options: A: get lost B: require water C: trained D: bark E: roll over
Target Answer: A: get lost
Predicted Answer: D: bark
question: Where are the most famous BBQ steakhouses in america? options: A: texas B: building C: kansas city D: maine E: falling down
Target Answer: A: texas
Predicted Answer: D: maine
question: The end of the barrel of what primitive firearm is bell shaped? options: A: barbell B: funnel C: blunderbuss D: wind instrument E: kettlebell
Target Answer: C: blunderbuss
Predicted Answer: E: kettlebell
question: Where would you get some maps that you own? options: A: bookstore B: library C: electrical circuit D: cabinet E: important when traveling
Target Answer: D: cabinet
Predicted Answer: B: library
question: A st

question: He was trying to procreate with many individuals, this led to a what? options: A: moaning B: die C: kiss D: std E: sanity
Target Answer: D: std
Predicted Answer: B: die
question: The juror was quite bored and zoning out but wanted to convey he was hearing testimony, so he just sat there doing what? options: A: take notes B: nodding C: change of heart D: writing down E: listening
Target Answer: B: nodding
Predicted Answer: E: listening
question: After working on the car, what did it end up doing? options: A: going too fast B: last several years C: honk the horn D: go fast E: start running
Target Answer: E: start running
Predicted Answer: A: going too fast
question: Where is there a telephone book in almost every room? options: A: at hotel B: house C: library D: bedsit E: closet
Target Answer: A: at hotel
Predicted Answer: C: library
question: Some people got escorted out of the library, they were probably what? options: A: state facts B: talking loudly C: making money D: amoun

question: What might happen if someone is not losing weight? options: A: loose skin B: beauty C: miss universe D: death E: healthier
Target Answer: D: death
Predicted Answer: A: loose skin
question: When getting in shape, this is something that does wonders? options: A: eat more B: starve C: give up D: period of recovery E: jogging
Target Answer: E: jogging
Predicted Answer: D: period of recovery
question: What could prevent a driving car from continuing to drive? options: A: automobile accidents B: backache C: pollution D: smoke E: low fuel tank
Target Answer: A: automobile accidents
Predicted Answer: E: low fuel tank
question: What is a person likely to experience after they stop being married to a mean person? options: A: isolation B: grief C: happiness D: relief E: angry
Target Answer: C: happiness
Predicted Answer: B: grief
question: Despite the large crowds, how did the depressed man feel? options: A: small group B: alone C: solitary D: solitude E: panic
Target Answer: C: solitar

question: There was no shade for Jenny. She was forced to lie there exposed to what? options: A: full sunlight B: bright sunshine C: sunny place D: eat cake E: direct sunlight
Target Answer: A: full sunlight
Predicted Answer: E: direct sunlight
question: What do people feel after having sex that requires them to shower? options: A: bedroom B: pleasant C: obesity D: painful E: dirty
Target Answer: E: dirty
Predicted Answer: D: painful
question: What is the thing that is agitated in your head when kissing? options: A: sexual stimulation B: herpes C: headache D: catch cold E: happiness
Target Answer: E: happiness
Predicted Answer: C: headache
question: Billy was reading the newspaper as he commuted to work, but once he got to his destination he balled it up and put it somewhere. Where did it put it? options: A: trash B: floor C: subway D: ground E: lawn
Target Answer: A: trash
Predicted Answer: D: ground
question: Where do you keep a pail in your house? options: A: garage B: pool C: utili

Target Answer: D: cast shadow
Predicted Answer: C: open
question: What kind of food makes someone sick? options: A: boat B: necessary to live C: edible D: unhealthy E: kitchen
Target Answer: D: unhealthy
Predicted Answer: C: edible
question: Where would you find the sharpest parts of a triangle? options: A: three vertices B: point C: 3 sides D: three sides E: math book
Target Answer: A: three vertices
Predicted Answer: B: point
question: What does going to bed with your spouse for sex lead to? options: A: bad dreams B: a good nights sleep C: rest D: sleepiness E: get pregnant
Target Answer: E: get pregnant
Predicted Answer: D: sleepiness
question: What would it be if they get a surprising show over and over? options: A: surprise B: fight C: annoyance D: might scare E: irritated
Target Answer: C: annoyance
Predicted Answer: A: surprise
question: The newlyweds began copulating their marriage, they wanted many what? options: A: babies B: odors C: sadness D: rapport E: ejaculation
Target A

#### Evaluate the model fine-tuned on social_i_qa for 2 epochs

In [17]:
# Load the pretrained model
social_i_qa_model = T5ForConditionalGeneration.from_pretrained('./models/social_i_qa')
social_i_qa_tokenizer = T5Tokenizer.from_pretrained('./models/social_i_qa')

In [18]:
# Load the validation dataset
social_i_qa_valid_dataset = torch.load('./data/social_i_qa/valid_data.pt')
social_i_qa_dataloader = torch.utils.data.DataLoader(social_i_qa_valid_dataset, batch_size = 32)

In [20]:
# Generate predictions
social_i_qa_predictions = []
social_i_qa_targets = []
social_i_qa_model.to(device)    
social_i_qa_model.eval()
with torch.no_grad():
    for batch in tqdm(social_i_qa_dataloader):
        prediction = social_i_qa_model.generate(input_ids = batch['input_ids'].to(device), 
                          attention_mask = batch['attention_mask'].to(device),
                          max_length = 16)
        prediction = [social_i_qa_tokenizer.decode(ids) for ids in prediction]
        target = [social_i_qa_tokenizer.decode(ids) for ids in batch['target_ids']]
    
        social_i_qa_predictions.extend(prediction)
        social_i_qa_targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=62.0), HTML(value='')))




In [22]:
metrics.accuracy_score(social_i_qa_targets, social_i_qa_predictions)

0.6371545547594678

In [25]:
incorrect_idxs = [i for i, prediction in enumerate(social_i_qa_predictions) if prediction != social_i_qa_targets[i]]
for incorrect_idx in incorrect_idxs:
    print(social_i_qa_tokenizer.decode(social_i_qa_valid_dataset[incorrect_idx]['input_ids']))
    print("Target Answer: {}".format(social_i_qa_tokenizer.decode(social_i_qa_valid_dataset[incorrect_idx]['target_ids'])))
    print("Predicted Answer: {}".format(social_i_qa_predictions[incorrect_idx]))

question: How would you describe Sydney? context: Sydney walked past a homeless woman asking for change but did not have any money they could give to her. Sydney felt bad afterwards. options: A: sympathetic B: like a person who was unable to help C: incredulous
Target Answer: A: sympathetic
Predicted Answer: B: like a person who was unable to help
question: How would Jordan feel afterwards? context: Jordan was in charge of taking the food on the camping trip and left all the food at home. options: A: horrible that he let his friends down on the camping trip B: happy that he doesn't need to do the cooking on the trip C: very proud and accomplished about the camping trip
Target Answer: A: horrible that he let his friends down on the camping trip
Predicted Answer: C: very proud and accomplished about the camping trip
question: How would you describe Aubrey? context: Aubrey never told Riley the answer and Riley was angry. options: A: rude B: smug at knowing the answer C: annoyed at Riley's

question: What will happen to Riley? context: Riley had a lot of friends. options: A: they will play with Riley B: they will like Riley C: interacted outside the home a lot
Target Answer: A: they will play with Riley
Predicted Answer: C: interacted outside the home a lot
question: Why did Sasha do this? context: Sasha grabbed Robins head and gave it a good shake. options: A: joke around B: bully C: better
Target Answer: A: joke around
Predicted Answer: C: better
question: What does Jordan need to do before this? context: Jordan paid his debt to society by completing several hours of community service. options: A: volunteer to do community service B: commit a crime against society C: be a law-abiding citizen all the time
Target Answer: B: commit a crime against society
Predicted Answer: A: volunteer to do community service
question: What will happen to Others? context: Aubrey met a creepy stranger at the park who was trying to get Aubrey to go home with them. options: A: be kidnapped B:

question: What will happen to Alex? context: Alex started going into labor, so she went to the hospital where she gave birth to Sasha. options: A: she will live a good life B: be happy C: she will be a baby
Target Answer: B: be happy
Predicted Answer: C: she will be a baby
question: What will Jordan want to do next? context: Jordan was mentioned in Casey's chapter but the description of her wasn't very flattering. options: A: tell Casey what she thought of her B: Ask Casey to delete the chapter C: Ask Casey to autograph the book
Target Answer: B: Ask Casey to delete the chapter
Predicted Answer: A: tell Casey what she thought of her
question: What will Remy want to do next? context: Remy was not supposed to do it, but he gave you Skylar's netflix account and pretended it was his own. options: A: get his own Netflix account B: cancel his Netflix account C: know a username
Target Answer: A: get his own Netflix account
Predicted Answer: B: cancel his Netflix account
question: What will ha

Target Answer: B: Carson's mother will scold them
Predicted Answer: C: Others will be punished
question: How would you describe Kai? context: Kai had some choice words to say but she decided to keep the language in check. options: A: intelligent B: ignorant C: like the better person for not engaging in the petty fight
Target Answer: C: like the better person for not engaging in the petty fight
Predicted Answer: A: intelligent
question: What will Tracy want to do next? context: Tracy gave birth to Addison and was very proud of her new child that day. options: A: have a baby of her own B: proud C: experience motherhood for the first time
Target Answer: C: experience motherhood for the first time
Predicted Answer: A: have a baby of her own
question: What will Addison want to do next? context: Addison and their friends were playing hide and seek at recess. Addison ran away to go find a hiding place. options: A: win the game of tag B: Come outside C: win the game of hide and seek
Target Ans

question: How would Carson feel as a result? context: kai was an outrageous fool so he stole carson's tools. options: A: as calm B: out of control C: as angry
Target Answer: C: as angry
Predicted Answer: B: out of control
question: What will Skylar want to do next? context: Kendall increased to Skylar's rate and felt that Skylar was too expensive and wanted to charge less themselves. options: A: convince Kendall to stay at their rate B: berate skylar C: find somewhere else
Target Answer: A: convince Kendall to stay at their rate
Predicted Answer: C: find somewhere else
question: How would Sasha feel as a result? context: Skylar has been working every day at getting her blackbelt and finally joined Sasha as having one. options: A: happy she finally earned her blackbelt B: glad Skylar got a blackbelt C: glad Skylar got a green belt
Target Answer: B: glad Skylar got a blackbelt
Predicted Answer: A: happy she finally earned her blackbelt
question: How would you describe Addison? context: A

question: Why did Carson do this? context: Carson was trying to study for a big test, so Carson turned Sydney's music down. options: A: listen to loud music B: make Sydney upset C: make things quiet
Target Answer: C: make things quiet
Predicted Answer: B: make Sydney upset
question: Why did Jesse do this? context: Jesse got the results for the test and got an A as a grade. options: A: keep it up B: celebrate C: studied hard
Target Answer: C: studied hard
Predicted Answer: A: keep it up
question: What will Robin want to do next? context: Robin took the test quickly because they wanted to pass quickly with flying colors. options: A: pass B: do well on the test C: fail
Target Answer: B: do well on the test
Predicted Answer: A: pass
question: How would Remy feel as a result? context: Quinn murders Remy's wife after Remy stole a lot of money from the family. options: A: had felt passive B: like the got away with it C: had felt angry
Target Answer: C: had felt angry
Predicted Answer: B: like

Target Answer: A: cruel
Predicted Answer: C: someone who likes to pull pranks
question: What will Sydney want to do next? context: Sydney took the trash to the burn barrel then set the trash on fire. options: A: Go inside B: Put out the fire C: Gather the trash needed for disposal
Target Answer: A: Go inside
Predicted Answer: B: Put out the fire
question: What will Jordan want to do next? context: Jordan mentioned Casey in chapter one, because he is planning on bending the knee with a ring in hand very soon. options: A: go to a theme park with Casey B: propose to Casey for marriage C: buy the ring
Target Answer: B: propose to Casey for marriage
Predicted Answer: C: buy the ring
question: How would Others feel as a result? context: Casey decided to learn about cooking so she took a Saturday evening cooking class instead of spending time with her boyfriend. options: A: excited to date a good cook B: like they are cooking too C: inattentive to the needs of her boyfriend at time
Target Ans

question: How would you describe Skylar? context: Skylar could not hang the moon but they could turn silver into gold. options: A: Talented B: Hated C: like a wizard
Target Answer: A: Talented
Predicted Answer: C: like a wizard
question: How would the man feel after? context: sasha had enough, she gave the man her two cents and let him have it. options: A: sorry for their actions B: big and proud C: frustrated
Target Answer: A: sorry for their actions
Predicted Answer: B: big and proud
question: Why did Quinn do this? context: quinn became friends with a cool guy who had a house so quinn moved into the house. options: A: be nice to the cool guy B: be able to be next to the guy whenever she wanted C: go against her parents
Target Answer: B: be able to be next to the guy whenever she wanted
Predicted Answer: A: be nice to the cool guy
question: What will Austin want to do next? context: Sasha spent Austin's money trying to win a prize even when the odds were stacked against her. options:

question: How would you describe Kendall? context: Kendall watched baseball every week to cheer for their favorite team. options: A: Indifferent to watch B: Loyal to others C: excited
Target Answer: B: Loyal to others
Predicted Answer: C: excited
question: What will happen to Others? context: Skylar played some soccer with their friend after school that day and had fun. options: A: join the game B: go to a bakery C: be tired from playing soccer
Target Answer: C: be tired from playing soccer
Predicted Answer: A: join the game
question: How would you describe Remy? context: Remy was playing a video game in their spare time. They prevented the enemies from engaging. options: A: a technical person B: a hard worker C: a skilled laborer
Target Answer: A: a technical person
Predicted Answer: B: a hard worker
question: What will happen to Riley? context: Riley grew very suspicious of Jesse because they snuck out every night at 11 PM. options: A: lock all the doors and windows at night B: be pu

Target Answer: B: go to sleep
Predicted Answer: C: hear the snoring
question: What does Lee need to do before this? context: Lee made copies of the their poster before starting his mayoral campaign. options: A: needed to design the poster for his campaign B: has the funds C: needed to drop out of the campaign
Target Answer: A: needed to design the poster for his campaign
Predicted Answer: B: has the funds
question: What does Taylor need to do before this? context: Their friend asked them to take a look at the engine, so Taylor studied the car carefully. options: A: open the hood B: be a mechanic C: buy a car
Target Answer: B: be a mechanic
Predicted Answer: C: buy a car
question: What will Jesse want to do next? context: Jesse was out sick for the day so Tracy was covering his class. She took his student's to the playground for recess. options: A: get better B: review their homework C: cover the next lesson
Target Answer: A: get better
Predicted Answer: C: cover the next lesson
questio

question: What will Cameron want to do next? context: Cameron got to the yard and stopped the dog fight before someone else. options: A: wash his hands good B: laugh at the dogs C: save on vet bills
Target Answer: A: wash his hands good
Predicted Answer: C: save on vet bills
question: What will Others want to do next? context: jan went to the same school for years so she was best friends with a girl. options: A: appreciate jan B: make sure they kept up their relationship C: hate jan
Target Answer: B: make sure they kept up their relationship
Predicted Answer: A: appreciate jan
question: Why did Alex do this? context: Alex walked Robin towards the execution chamber for her last meal. options: A: work at the jail B: So Robin can eat C: release her
Target Answer: B: So Robin can eat
Predicted Answer: C: release her
question: How would you describe Kendall? context: Kendall took Skylar's schedule into account when planning the trip for their summer vacation. options: A: supported B: includ

Predicted Answer: C: lose money
question: What will happen to Others? context: Riley and their friend were best friends. options: A: share happiness with their friend B: share sadness with their friend C: have fun doing things with Riley
Target Answer: A: share happiness with their friend
Predicted Answer: C: have fun doing things with Riley
question: What does Cameron need to do before this? context: Cameron took Jan's motorcycle for a spin without permission. options: A: Check the gas B: Grab his jacket C: Check the oil
Target Answer: A: Check the gas
Predicted Answer: B: Grab his jacket
question: How would you describe Sasha? context: Sasha was playing a game but they got tired of it. options: A: fed up B: Someone that want's something else to do C: bored
Target Answer: C: bored
Predicted Answer: A: fed up
question: What will Bailey want to do next? context: Cameron had hired Bailey to help with an important job and felt satisfied with Bailey's work. options: A: Apply for unemployme

Target Answer: A: make Sydney take charge
Predicted Answer: C: relax
question: What will Carson want to do next? context: Carson ordered a large pizza for dinner. options: A: cut the grass B: washes his hands before dinner C: take out the dog
Target Answer: B: washes his hands before dinner
Predicted Answer: C: take out the dog
question: What will Kai want to do next? context: Kai wanted everything on their list for Christmas. options: A: go Christmas shopping B: joy C: tell their list
Target Answer: C: tell their list
Predicted Answer: A: go Christmas shopping
question: What will happen to Alex? context: Alex set Riley on fire in the game when Riley had low health left. options: A: win the game B: will get sick after the game C: will quit playing
Target Answer: A: win the game
Predicted Answer: B: will get sick after the game
question: What will their step dad want to do next? context: Riley left home with their sister today, because they were grounded by their step dad. options: A: r

question: What will Others want to do next? context: Bailey had been feeling angry for a quite a while, one day she had enough and expressed her anger to the others. options: A: leave Bailey alone B: release pressure C: Apologize to Bailey
Target Answer: A: leave Bailey alone
Predicted Answer: C: Apologize to Bailey
question: How would you describe Quinn? context: Quinn saw their dog get hit by a car. They cried their eyes out. options: A: terrible B: sentimental C: vengeful
Target Answer: B: sentimental
Predicted Answer: C: vengeful
question: How would you describe Jesse? context: Jesse went to the zoo with people from church. They saw all the animals in the zoo. options: A: inquisitive B: excited C: wandering
Target Answer: B: excited
Predicted Answer: A: inquisitive
question: How would Jan feel afterwards? context: Jan went to work on the morning of the party because she was called in. options: A: tired B: happy C: loyal to work
Target Answer: A: tired
Predicted Answer: C: loyal to 

Target Answer: B: solve problems
Predicted Answer: A: take care of it
question: What will Bailey want to do next? context: Bailey got a promotion at work after working there for only a week. options: A: fire their boss B: find a job with better promotions C: do a good job for more promotions
Target Answer: C: do a good job for more promotions
Predicted Answer: B: find a job with better promotions
question: How would Tracy feel as a result? context: Quinn held onto Tracy's forms because Tracy didn't want Quinn to lose them on the bus. options: A: be mad about being the form guide B: dutiful C: be relieved to not have to locate the forms
Target Answer: B: dutiful
Predicted Answer: C: be relieved to not have to locate the forms
question: What will Kendall want to do next? context: Kendall got ready and snatched their journal away to keep it private. options: A: lock their journal away B: hide information C: hide their journal
Target Answer: C: hide their journal
Predicted Answer: A: lock 

Target Answer: B: hopeful
Predicted Answer: A: A hardworking person
question: How would Riley feel as a result? context: Riley talked to their friends at the party and had a good time. options: A: connected to friends B: social C: one with friends
Target Answer: A: connected to friends
Predicted Answer: B: social
question: How would you describe Carson? context: Carson dug around in the bag and ate Bailey's candy once he had found the stash of it. options: A: Someone who hates candy and sweets B: Someone who steals from people C: angry
Target Answer: B: Someone who steals from people
Predicted Answer: A: Someone who hates candy and sweets
question: What will Jesse want to do next? context: Bailey passed Jesse an examination booklet and they both grabbed their pencils and waited for the go ahead to start the test. options: A: do well on the test B: finish the test C: fill in the test
Target Answer: A: do well on the test
Predicted Answer: B: finish the test
question: How would the other

question: How would you describe Jordan? context: Jordan affected children's happiness by always yelling at them and telling them no. options: A: mad B: authoritative C: regretful
Target Answer: B: authoritative
Predicted Answer: A: mad
question: What does Aubrey need to do before this? context: Aubrey took tennis lessons as a method to get in shape. options: A: stay healthy B: go to the game C: get tennis clothes
Target Answer: C: get tennis clothes
Predicted Answer: B: go to the game
question: How would you describe Addison? context: Addison turned their music down because they were making noise. options: A: bored B: annoyed C: thoughtful
Target Answer: C: thoughtful
Predicted Answer: B: annoyed
question: Why did Cameron do this? context: Cameron moved Kendall's body into a trash bag. options: A: was nice B: killed Kendall C: needed to buy trash bags
Target Answer: B: killed Kendall
Predicted Answer: A: was nice
question: What will Casey want to do next? context: Casey finds a baby k

question: What will Bailey want to do next? context: Bailey passed Jesse an examination booklet after she decided not to study that subject anymore. options: A: give the booklet back to Bailey B: leave the class C: throw away the examination booklet
Target Answer: B: leave the class
Predicted Answer: A: give the booklet back to Bailey
question: How would you describe Skylar? context: Skylar was outdoors in the wintertime and pulled the wool over their eyes and face. options: A: feeling cold B: wearing wool C: feeling warm
Target Answer: A: feeling cold
Predicted Answer: C: feeling warm
question: What does Skylar need to do before this? context: Skylar distributed the data in several forms without double checking the figures. options: A: gather the data B: hand out the forms C: rush things
Target Answer: C: rush things
Predicted Answer: A: gather the data
question: How would Aubrey feel as a result? context: Aubrey sounded good today considering they had been sick last week. options: A:

question: What will Others want to do next? context: Alex and his cohorts, the former criminal masterminds for the Lupino family, made their escape from jail. options: A: Bribe the guards B: contact the FBI C: Plan the escape
Target Answer: B: contact the FBI
Predicted Answer: C: Plan the escape
question: How would you describe Kai? context: Kai was swinging the bat without paying attention and swung through the posts. options: A: Angry B: Cautious C: Think about that swing
Target Answer: C: Think about that swing
Predicted Answer: A: Angry
question: How would you describe Riley? context: Riley was competing in a gymnastics meet. Riley felt worse after she lost. options: A: competitive B: upset C: regretful
Target Answer: B: upset
Predicted Answer: A: competitive
question: How would Others feel as a result? context: Jordan found out that they were very ill, but they made light of the ailment. options: A: feeling sad B: Humorous C: Uncomfortable
Target Answer: C: Uncomfortable
Predicted

#### Evaluate the model fine-tuned on hellaswag for 3 epochs

In [14]:
# Load the pretrained model
hellaswag_model = T5ForConditionalGeneration.from_pretrained('./models/hellaswag/3_epochs')
hellaswag_tokenizer = T5Tokenizer.from_pretrained('./models/hellaswag/3_epochs')

In [15]:
# Load the validation dataset
hellaswag_valid_dataset = torch.load('./data/hellaswag/valid_data.pt')

In [30]:
hellaswag_dataloader = torch.utils.data.DataLoader(hellaswag_valid_dataset, batch_size = 16)

In [None]:
# Generate predictions
hellaswag_predictions = []
hellaswag_targets = []
hellaswag_model.to(device)    
hellaswag_model.eval()
with torch.no_grad():
    for batch in tqdm(hellaswag_dataloader):
        prediction = hellaswag_model.generate(input_ids = batch['input_ids'].to(device), 
                          attention_mask = batch['attention_mask'].to(device),
                          max_length = 128)
        prediction = [hellaswag_tokenizer.decode(ids) for ids in prediction]
        target = [hellaswag_tokenizer.decode(ids) for ids in batch['target_ids']]
    
        hellaswag_predictions.extend(prediction)
        hellaswag_targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=628.0), HTML(value='')))

In [25]:
metrics.accuracy_score(hellaswag_targets, hellaswag_predictions)

0.5786852589641435

In [26]:
incorrect_idxs = [i for i, prediction in enumerate(hellaswag_predictions) if prediction != hellaswag_targets[i]]
for incorrect_idx in incorrect_idxs:
    print(hellaswag_tokenizer.decode(hellaswag_valid_dataset[incorrect_idx]['input_ids']))
    print("Target Answer: {}".format(hellaswag_tokenizer.decode(hellaswag_valid_dataset[incorrect_idx]['target_ids'])))
    print("Predicted Answer: {}".format(hellaswag_predictions[incorrect_idx]))
    print('\n')

activity: Clean and jerk context: A lady walks to a barbell. She bends down and grabs the pole. the lady options: A: swings and lands in her arms. B: pulls the barbell forward. C: pulls a rope attached to the barbell. D: stands and lifts the weight over her head.
Target Answer: D: stands and lifts the weight over her head.
Predicted Answer: B: pulls the barbell forward.


activity: High jump context: A boy is running down a track. the boy options: A: runs into a car. B: gets in a mat. C: lifts his body above the height of a pole. D: stands on his hands and springs.
Target Answer: C: lifts his body above the height of a pole.
Predicted Answer: B: gets in a mat.


activity: High jump context: The boy lifts his body above the height of a pole. The boy lands on his back on to a red mat. the boy options: A: turns his body around on the mat. B: gets up from the mat. C: continues to lift his body over the pole. D: wiggles out of the mat.
Target Answer: B: gets up from the mat.
Predicted Answe

Target Answer: A: is slowly starting to look really really nice, once he is completely finished the yard looks great, nice and smooth.
Predicted Answer: C: gets mowed quicker by the dog.


activity: Grooming dog context: There's a nbc news reporter wearing a red sweater and white shirt talking about dog grooming by scissor wizard a grooming company. the representative from the company options: A: is talking about the services they offer and how those dogs participate in dog shows. B: speaks to a woman sitting on a colorful conference room chair next to a brown dog kennel in front of them. C: tells viewers about a dog grooming company, itv with a website. D: in grey sweater and black pants talks about the procedures on grooming dogs in life and techniques.
Target Answer: A: is talking about the services they offer and how those dogs participate in dog shows.
Predicted Answer: C: tells viewers about a dog grooming company, itv with a website.


activity: Having an ice cream context: A wo

activity: Removing ice from car context: The man cleans the snow under his windshield wipers. We see the building behind the car. We do a 360 spin to see the parking lot. the man options: A: finishes and turns off the wipers. B: opens the doors on his car and gets in. C: finishes and slides out of the car. D: puts up the camera.
Target Answer: B: opens the doors on his car and gets in.
Predicted Answer: A: finishes and turns off the wipers.


activity: Clean and jerk context: A man in a blue shirt and shorts lifts a large weight over his head. Another man is filming it. The same man comes back and lifts the weight over his head again. he options: A: drops it on the ground and walks away. B: again lifts the weight over his head. C: drops it and runs towards the camera. D: drops it onto the floor where it gets stuck.
Target Answer: A: drops it on the ground and walks away.
Predicted Answer: B: again lifts the weight over his head.


activity: Throwing darts context: A man is seen throwin

Target Answer: A: attempts to walk through where he is mopping.
Predicted Answer: D: walks behind him and sets the mop down.


activity: Mooping floor context: A man is mopping the floor with a mop. Another boy attempts to walk through where he is mopping. the man options: A: takes a bucket, large hose and goes in the sink with it. B: puts his foot down and walks away. C: takes a drink of beer and complains to the camera. D: walks around the mopping area.
Target Answer: C: takes a drink of beer and complains to the camera.
Predicted Answer: D: walks around the mopping area.


activity: Mooping floor context: Another boy attempts to walk through where he is mopping. The man takes a drink of beer and complains to the camera. he options: A: gets back in the raft to go back to the store. B: continues to mop mopping the floor. C: mops through the room in between the people. D: continues scrubbing the mop with another spray and the boy walks back.
Target Answer: C: mops through the room in b

Target Answer: A: adjusts the violin under her chin as she reads the music.
Predicted Answer: D: explains the song she is about to play.


activity: Putting in contact lenses context: A woman in a suit is talking to a camera. A woman is putting contact lenses into her eyes. contact lenses options: A: are shown inside a case onto her nose, inside and outside. B: are shown on a hand. C: are being put into elongated pupil holes. D: are made and put in.
Target Answer: B: are shown on a hand.
Predicted Answer: D: are made and put in.


activity: Running a marathon context: A crowd watches as several runners in a race jog past. they all options: A: are interviewed by microphones outside. B: stop and change positions as the race starts. C: keep jogging, coming in at the end of the race. D: fall down as they attempt to make the jump.
Target Answer: C: keep jogging, coming in at the end of the race.
Predicted Answer: B: stop and change positions as the race starts.


activity: Having an ice cre

Target Answer: C: reaches the end of the yard she makes a turn and drives up in the same direction she just came from.
Predicted Answer: A: makes it to the end of the lawn, she stops, takes a deep breath, gets off her horse and walks away.


activity: Cutting the grass context: When the girl reaches the end of the yard she makes a turn and drives up in the same direction she just came from. when she options: A: reaches the end, she slows down and goes through the washer and dryer area that is lined up before she pulls a spare tire off of the trunk of the car and puts the tire back on. B: makes a turn she goes up arm high and lands inside the field. C: reaches that end, she makes a turn but it's too tight and she reverses a little. D: reaches the end of the yard she jumps off the back of the truck, completes a triple jump on the wooden stairs of the backstop and springs back down and even does a 360 flip before jumping off again.
Target Answer: C: reaches that end, she makes a turn but 

activity: Painting context: An intro leads into a close up of a painting and a paintbrush swirling around and around. the hand holding the brush options: A: dips it into paint and paints while the woman continues painting. B: begins painting while standing next to a large piece of wood and pausing to wipe the brush on the canvas. C: begins painting grass into the painting slowly down the picture. D: is then seen using it to paint on the canvas causing it to move up and down and the logo continues flipping around but stops at the end.
Target Answer: C: begins painting grass into the painting slowly down the picture.
Predicted Answer: B: begins painting while standing next to a large piece of wood and pausing to wipe the brush on the canvas.


activity: Playing badminton context: Woman is carrying the child on his back and then she is with a man and the kid riding bikes in the park. kid options: A: is in stairs ice climbing and here is a man catching trash in the woods and the boys are i

activity: Tai chi context: We see a man standing in a room. The man starts performing tai chi. The man turns and faces left. the man options: A: turns to face right. B: starts dancing for the camera. C: demonstrates tai chi in a seated position. D: does a squat and drinks, then turns and walks from the room.
Target Answer: A: turns to face right.
Predicted Answer: C: demonstrates tai chi in a seated position.


activity: Tai chi context: The man turns to face right. The man makes his way to the right side of the room slowly. the screen options: A: fades to regular color. B: then fades to black. C: flashes into black giving the man a face show. D: displays the phone number of the man with the camera.
Target Answer: B: then fades to black.
Predicted Answer: A: fades to regular color.


activity: Layup drill in basketball context: A man in a blue shirt is playing basketball on a court. he options: A: is making strides in the game. B: shoots at the hoop and misses it. C: starts running tow

Target Answer: B: jumps over the beam and stands up on the mat.
Predicted Answer: C: then throws a ball down onto the mat.


activity: Gargling mouthwash context: A woman sits in front of a bathroom mirror squishing water in her mouth, as if rinsing her mouth. the woman options: A: then spits the water into the sink and looks at her teeth very close in the mirror before the scene fades out. B: then opens a hand mirror and tubs a wet cloth, which she pours into her mouth and animates. C: moves her fingers towards her mouth and stops. D: finishes and reaches for her mouth in the sink.
Target Answer: A: then spits the water into the sink and looks at her teeth very close in the mirror before the scene fades out.
Predicted Answer: D: finishes and reaches for her mouth in the sink.


activity: Horseback riding context: In between interviews they show the duggar mom and the little girl getting instructions about their horse and then getting on their horses and riding on them in an enclosed a

activity: Arm wrestling context: Two women are on a stand opposite one another at a table beginning to arm wrestle. there options: A: are text across the screen that say " hi, what's up, matt ". B: is a brief hold up before they begin, because their thumb positing is inaccurate. C: is a dancing routine going on in the background. D: is a microphone that is continually playing on the right side of the table while two of the contestants are able to hold their fingers close to each other.
Target Answer: B: is a brief hold up before they begin, because their thumb positing is inaccurate.
Predicted Answer: D: is a microphone that is continually playing on the right side of the table while two of the contestants are able to hold their fingers close to each other.


activity: High jump context: The guy seems to be practicing how high he can jump by jumping over the pole many times, each time a person raises the pole higher and higher to see if the guy can jump over it. one jump the guy option

Target Answer: C: then moves the bags out of the yard and mows the lawn.
Predicted Answer: B: kneels down and puts what he is raking on the ground.


activity: Cheerleading context: The men flip then the ladies flip and the men lift them in the air and hold them up as the crowd claps. The people forum groups and flip three girls in the air. the people options: A: stop spinning and poles except for the right and right girl. B: put the ladies on their shoulders and flip them into the air and spin then do arm wrestling as people run running up. C: form two groups and hold up two girls who hold up one girl together. D: flip and the men stand on them.
Target Answer: C: form two groups and hold up two girls who hold up one girl together.
Predicted Answer: D: flip and the men stand on them.


activity: Tumbling context: A man is inside of a gym. He starts doing flips inside the gym. He does many tricks as he jumps in the air. he options: A: lands on his feet after the tricks. B: stops perform

activity: Sharpening knives context: The man uses stacked pennies to get the angle of the knife to be sharpened and demonstrates how to sharpen kitchen knives using a stone. the man options: A: uses an electric knife sharpener to sharpen the knife. B: slides the knife on the stone while pressing the knife with his fingers. C: then straps a loop of gold metal onto the handle of the knife and tightens it to cut the metal and leans forward to prepare the bend. D: sets down the knife and continues talking while demonstrating sharpening the knife.
Target Answer: B: slides the knife on the stone while pressing the knife with his fingers.
Predicted Answer: D: sets down the knife and continues talking while demonstrating sharpening the knife.


activity: Sharpening knives context: The man uses stacked pennies to get the angle of the knife to be sharpened and demonstrates how to sharpen kitchen knives using a stone. The man slides the knife on the stone while pressing the knife with his fingers

activity: Layup drill in basketball context: A man is seen speaking to a group of boys on a basketball court. the boys options: A: then begin shooting hoops and passing to one another. B: track a ball around the court and lead into a game of dribbling the ball. C: are then seen flipping, kicking, diving, and sitting. D: kick a ball back and fourth while others watch.
Target Answer: A: then begin shooting hoops and passing to one another.
Predicted Answer: D: kick a ball back and fourth while others watch.


activity: Camel ride context: Two women are sitting on a small camel. The woman on the back of the camel waves at the camera. the camel options: A: begins to walk and carry the women on it's back. B: is running fast and the people behind keep up with his pace. C: takes off on its side and the two women follow behind. D: shakes the woman on its back.
Target Answer: A: begins to walk and carry the women on it's back.
Predicted Answer: C: takes off on its side and the two women follow 

Target Answer: A: decides to turn off the red mower.
Predicted Answer: C: walks across the yard and opens the back door of the red riding lawnmower.


activity: Sharpening knives context: We are instructed and shown how to use the sharpening steel. We are instructed and shown how to use a bowl to sharpen a knife. we options: A: are shown the knife cutting tomatoes. B: see the shuttlecock in the image. C: see the lady in red again. D: are shown the polish and prep display.
Target Answer: A: are shown the knife cutting tomatoes.
Predicted Answer: C: see the lady in red again.


activity: Running a marathon context: A group of people are walking outdoors, and people are preparing to run in a race. they options: A: jump over one another to get to the finish line, but each run ends with one missing. B: are very much like the people on tv, looking forward, and having a good time. C: run down city streets and past numerous buildings. D: run down to the shore of the river to jump into the wate

Target Answer: B: goes out of bounds again.
Predicted Answer: C: returns and the man in green serves the ball.


activity: Playing beach volleyball context: Four men play beach volleyball while serving, passing and hitting the ball. A little girl stand behind the players. then options: A:, the girl and the two other girls continue playing. B:, the players stand as spectators, while the girls run and swim. C:, the young man play beach volleyball with another guy. D:, the men change sides and continues playing beach volleyball.
Target Answer: D:, the men change sides and continues playing beach volleyball.
Predicted Answer: A:, the girl and the two other girls continue playing.


activity: Ice fishing context: A man is standing in the snow holding a fishing pole and talking. A view of under the water is shown. the man options: A: is kneeling down in the snow reeling a fishing pole. B: grabs some fish and gets in the water and puts it in a hole. C: is laying on the ground while leading a 

activity: Sharpening knives context: A guy take a knife from a wooden block and a knife sharpener from a drawer. The guy is sharpening a knife. the guy options: A: places the knife and the elongated sharpener on the counter top. B: writes on paper using sharpener. C: holds the knife in his hand and a blue piercing outline of the knife is in the center wile the guy is sharpening it. D: bends and picks the knife sharpener up.
Target Answer: A: places the knife and the elongated sharpener on the counter top.
Predicted Answer: D: bends and picks the knife sharpener up.


activity: Sharpening knives context: The guy is sharpening a knife. The guy places the knife and the elongated sharpener on the counter top. a person options: A: starts an ironing machine. B: brings out a wooden chopping board. C: walks by from behind and gets a quick peek then keeps walking. D: dorks an ice cream cone.
Target Answer: B: brings out a wooden chopping board.
Predicted Answer: C: walks by from behind and gets

Target Answer: A: runs and now he's jumping 6'4 feet high but unfortunately he knocks the pole off and as soon as he realizes it he gets up and is very disappointed in himself.
Predicted Answer: D: smiles while he's jumping through the air, grabs a pose and goes to the girls, flips and lands and then walks away.


activity: Hand car wash context: The person kneels next to his car and shows cleaning products. The man shakes up the product before applying it to a sponge. the man options: A: waxes down the door of the car with the product. B: rubs the sponge in foam on the cars tires. C: rubs the wet area of the windshield then wipes it off with a sponge to remove the germs. D: puts it under the tire of the car.
Target Answer: A: waxes down the door of the car with the product.
Predicted Answer: C: rubs the wet area of the windshield then wipes it off with a sponge to remove the germs.


activity: Running a marathon context: Suddenly, it is morning, and there are four men running in front

Target Answer: D: see a person add a point by moving a silver bobble.
Predicted Answer: A: see the ending screen on a black background.


activity: Baking cookies context: Once the cookies are in the oven, the two girls sit in front of the oven and watch the cookies bake. when they options: A:'re done in the oven, they have another batch baked and their dad has a plate to take off next. B: are done, they are placed on the table and the two begin eating. C:'re done talking, the second girl hands the kids a cookie. D: are done, both of the girls take a break and then finish their food and put it on the plate.
Target Answer: B: are done, they are placed on the table and the two begin eating.
Predicted Answer: C:'re done talking, the second girl hands the kids a cookie.


activity: Snow tubing context: A group of friends are seated on sleds at the top of the hill. the two friends options: A: are gear shoeing down the hill with an equipment strap on. B: push their backs against a cliff. C: 

Target Answer: B: wipes ice cream from the child's face.
Predicted Answer: A: eats the ice cream cone with his hand.


activity: Calf roping context: Clips of both of men talking and clips of them riding horses and wrangling calves play and rotate between the two of them. the outro video options: A: finally comes through and it shows both men close up outside bouncing around. B: begins and it shows a hanging banner at the arena that reads " calgary stampede ", a woman riding a horse and holding a red flag that say's " 1912-2012 calgary stampede ", and a calf is running across a dirt field. C: ends with video credits as well as the change of language shown from the start of the video. D: logo appears with infographic and blue paragraph saying down the line, " viewers please find a property that would be willing to set up an interview with both of them, separately, by email, via texts and phone " and pauses to let the others catch up and continues talking into the mic.
Target Answer: B: 

activity: Spread mulch context: The man then comes in the back yard with a green wheelbarrow and dumps the dirt in the back yard. he options: A: shovels up the dirt from the side yard down. B: returns to face the camera and the man i just talked to begins doing some woman talk about gardening. C: talks for a while and the camera shows the process of dustputting. D: then takes his rake and moves the dirt back and forth around the house evening it out.
Target Answer: D: then takes his rake and moves the dirt back and forth around the house evening it out.
Predicted Answer: A: shovels up the dirt from the side yard down.


activity: Disc dog context: A man is outside with two dogs on leashes. he options: A: seems to be talking to the camera when suddenly the dogs start fighting and punching each other. B: puts on a show with the trained dogs. C: is brushing the dogs legs with the leashes. D: is using frisbees from an assortment for the dogs.
Target Answer: B: puts on a show with the train

activity: High jump context: A man is seen standing ready outside a track and begins running towards a bar. the man options: A: then throws a long pole across the distance in slow motion and watches himself jump into the sand. B: places his hands on the bar continuing to perform several tricks. C: then jumps over the bar and is shown again in slow motion. D: then takes a giant leap into the air and begins performing a gymnastic routine.
Target Answer: C: then jumps over the bar and is shown again in slow motion.
Predicted Answer: D: then takes a giant leap into the air and begins performing a gymnastic routine.


activity: Wrapping presents context: A man lays out a scarf and puts some accessories in the middle of it. he options: A: ties the end and folds it into double knots. B: ties a ribbon around the bottom edge. C: then puts it on his arm. D: wraps a jacket around the jacket with the scarf.
Target Answer: A: ties the end and folds it into double knots.
Predicted Answer: B: ties a 

activity: Running a marathon context: A woman is seen speaking to the camera and leads into a woman running and interviewing other runners. she options: A: interviews two twins while running and the news shows off other runners in the area. B: explains while talking while others do the same even though their slower pace. C: continues running in the field with the others and others hiding in the woods. D: interviews several runners and ends with her leading a world cup.
Target Answer: A: interviews two twins while running and the news shows off other runners in the area.
Predicted Answer: B: explains while talking while others do the same even though their slower pace.


activity: Longboarding context: We see kids with feet on skateboards. we options: A: see the kids riding down the street from a skater pov. B: see a boy rollerblading. C: see a woman holding part of a hat. D: see a kitchen and a doctor.
Target Answer: A: see the kids riding down the street from a skater pov.
Predicted A

activity: Wrapping presents context: A woman is in a living room in front of a fire. She cuts wrapping paper in strips, then wraps it neatly around a box. she options: A: grabs an egg and circles it with a silver marker. B: tapes the box, then lays it down gently on the table. C: lets the wrapping paper fall away from the box at the end. D: puts icing on the top, then seals it.
Target Answer: B: tapes the box, then lays it down gently on the table.
Predicted Answer: C: lets the wrapping paper fall away from the box at the end.


activity: Removing curlers context: A woman is seen curling her hair while speaking to the camera and leads into her speaking in front of judges. the judges options: A: critique her and she runs away happy and skipping out of the building. B: then ddle the woman while she records and showing off her hair style. C: then strike off balls as well as speaking to one another and wrapping up the interviews. D: speak to her and blow dry her hair while she talks that l

activity: Washing face context: A woman is standing in a room talking. She shows a bottle in her hand. she options: A: sprays something onto her face. B: holds a bottle of mouthwash and shakes it in her hand. C: pours some of the lotion onto her hands and rubs it on her face. D: then explains the correct way to wash your hands.
Target Answer: C: pours some of the lotion onto her hands and rubs it on her face.
Predicted Answer: A: sprays something onto her face.


activity: Croquet context: The boy sit on a bench outside with a bag of croquet mallets and balls and talk to each other. the boys options: A: have an animated conversation where only the boy and the walkie talkies are in his use. B: then enter the field and hurt themselves trying to untangle the croquet tools. C: went outside and played with the kids, talking about what sport they have joined. D: split logs of wood into a triangle on the table.
Target Answer: B: then enter the field and hurt themselves trying to untangle the 

Target Answer: D: then lifts the bell above his head and then lets the bell fall to the floor where it bounces a bit upon landing as the man turns and walks away from the weight.
Predicted Answer: B: then pulls himself several times up successfully.


activity: Wrapping presents context: We see a box on fabric. A person wraps the box in the fabric and ties it up. we options: A: see a man laying carpet in a bedroom. B: see the person wrap to bottles in the fabric together. C: see a person making small holes in floor. D: see the box in person from a distance.
Target Answer: B: see the person wrap to bottles in the fabric together.
Predicted Answer: D: see the box in person from a distance.


activity: Wrapping presents context: A person wraps the box in the fabric and ties it up. We see the person wrap to bottles in the fabric together. they options: A: put the shirt on and the jacket on. B: then put two bottles in a bag and tie it. C: are receiving gifts from home. D: put the package in

#### Evaluate the model fine-tuned on common_gen for 3 epochs

In [3]:
# Load the pretrained model
common_gen_model = T5ForConditionalGeneration.from_pretrained('./models/common_gen/3_epochs')
common_gen_tokenizer = T5Tokenizer.from_pretrained('./models/common_gen/3_epochs')

In [10]:
# Load the validation dataset
common_gen_valid_dataset = torch.load('./data/common_gen/valid_data.pt')
common_gen_dataloader = torch.utils.data.DataLoader(common_gen_valid_dataset, batch_size = 32)

In [11]:
common_gen_predictions = []
common_gen_targets = []
common_gen_model.to(device)    
common_gen_model.eval()
with torch.no_grad():
    for batch in tqdm(common_gen_dataloader):
        prediction = common_gen_model.generate(
            input_ids = batch["input_ids"].to(device),
            attention_mask = batch["attention_mask"].to(device),
            num_beams = 5,
            length_penalty = 0.6,
            max_length = 32 + 2,  # +2 from original because we start at step=1 and stop before max_length
            #min_length = 1 + 1,  # +1 from original because we start at step=1
            no_repeat_ngram_size = 3,
            early_stopping = True
        )
        prediction = [common_gen_tokenizer.decode(ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) for ids in prediction]
        target = [common_gen_tokenizer.decode(ids) for ids in batch['target_ids']]
        
        common_gen_predictions.extend(prediction)
        common_gen_targets.extend(target)

HBox(children=(FloatProgress(value=0.0, max=126.0), HTML(value='')))




#### Generate sentences for commonsense_qa concepts

In [4]:
# Load the dataset(s)
commonsense_qa_concepts_train_dataset = torch.load('./data/commonsense_qa_concepts/train_data.pt')
commonsense_qa_concepts_valid_dataset = torch.load('./data/commonsense_qa_concepts/valid_data.pt')

In [8]:
common_gen_model.to(device)
def gen_sentence(words, max_length=32):
    features = common_gen_tokenizer([words], return_tensors='pt')

    output = common_gen_model.generate(
        input_ids=features['input_ids'].to(device),
        attention_mask=features['attention_mask'].to(device),
        max_length=max_length + 2,
        num_beams = 5,
        length_penalty = 0.6,
        #min_length = 1 + 1,  # +1 from original because we start at step=1
        no_repeat_ngram_size = 2,
        early_stopping = True
    )
    return common_gen_tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)

In [9]:
for i in range(50):
    concepts = common_gen_tokenizer.decode(commonsense_qa_concepts_valid_dataset[i]['input_ids'])
    sentence = gen_sentence(concepts.split(':')[1])
    print(concepts)
    print(sentence)
    print('\n')

generate sentence: door direction travel serves security bank
the direction in which a person travels through the door serves security and serves as security for the bank .


generate sentence: door direction travel serves security library
security guards travel through a locked door in the direction of the library serving as security .


generate sentence: door direction travel serves security department store
security guards travel through the doors of department stores in the direction of the department store in which they serve security.


generate sentence: door direction travel serves security mall
security guards travel through the door of a mall to serve security in the direction of travelers traveling through security.


generate sentence: door direction travel serves security new york
security at the door serves as a guide for travelers traveling in the direction of new york .


generate sentence: people aim work complete job
people aim to work hard to complete a job .


gene