# Final Project

In [None]:
import nfl_data_py as nfl
import pandas as pd
import re
import evaluate
import torch
import numpy as np
import random
from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments
from transformers import Trainer, BertForSequenceClassification, AutoTokenizer
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from datasets import Dataset, DatasetDict, load_dataset

In [None]:
## Get the spread of play type over each year

# 2022
df_2022 = nfl.import_pbp_data([2022], downcast=True, cache=False, alt_path=None)

print(len(df_2022))
print(df_2022['play_type'].value_counts())

# 2023
df_2023 = nfl.import_pbp_data([2023], downcast=True, cache=False, alt_path=None)

print(len(df_2023))
print(df_2023['play_type'].value_counts())

# 2024
df_2024 = nfl.import_pbp_data([2024], downcast=True, cache=False, alt_path=None)

print(len(df_2024))
print(df_2024['play_type'].value_counts())

In [None]:
# Choose the most recent 3 seasons
years_wanted = [2022, 2023, 2024]

# Get the data from the nfl library - this is described in their python docs
df = nfl.import_pbp_data(years_wanted, downcast=True, cache=False, alt_path=None)

# Data Cleaning

In [None]:
# Choose valid play types
valid_play_types = ['run', 'pass']
mask = df['play_type'].isin(valid_play_types)
df = df[mask]

# Choose the feature columns needed to construct the text for the play_type
selected_columns = ['posteam', 'defteam', 'qtr', 'down', 'play_type', 'ydstogo', 'yardline_100', 
                   'offense_personnel', 'defense_personnel', 'passer_player_name', 
                    'quarter_seconds_remaining', 'no_huddle', 'shotgun', 'desc', 'first_down', 
                   'touchdown', 'yards_gained', 'fumble_lost', 'interception', 'incomplete_pass']

df['passer_player_name'] = df['passer_player_name'].fillna('The QB')
df = df.dropna(subset=selected_columns)
df = df[selected_columns].reset_index(drop=True)

In [None]:
## Function to generate input_text
def generate_input_text(row):
    # Get the feature columns for the current row
    offense = row['posteam']
    defense = row['defteam']
    qb = row['passer_player_name']
    off_look = row['offense_personnel']
    def_look = row['defense_personnel']
    qtr = row['qtr']
    down = int(row['down'])
    play = row['play_type']
    ydstogo = int(row['ydstogo'])
    disttd = int(row['yardline_100'])
    qtr_left = row['quarter_seconds_remaining']
    no_hud = row['no_huddle']
    sg = row['shotgun']

    ## Redefine the time left in the quarter to a string that
    ## converts to minutes because 500+ seconds is not how
    ## a person would describe it
    # Check if there is minutes or seconds left
    if qtr_left > 60:
        # Round to the current number of minutes left
        min = int(qtr_left / 60)
        
        # Check if its only one minute, so that there is no s
        if min > 1:
            qtr_left = f"{min} minutes"
        else:
            qtr_left = f"{min} minute"

    # If there is less than a minute left, check if its only one second, so that there is no s
    else:
        if qtr_left > 1.0:
            qtr_left = f"{int(qtr_left)} seconds"
        else:
            qtr_left = f"{int(qtr_left)} second"

    ## Redefine quarter so it is a string and uses the
    ## conventional first second third
    if qtr == 1.0:
        qtr = f"{int(qtr)}st"
    elif qtr == 2.0:
        qtr = f"{int(qtr)}nd"
    elif qtr == 3.0:
        qtr = f"{int(qtr)}rd"
    elif qtr == 4.0:
        qtr = f"{int(qtr)}th"

    ## Redefine down so it is a string and uses the
    ## conventional firt second third
    if down == 1:
        down = f"{down}st"
    elif down == 2:
        down = f"{down}nd"
    elif down == 3:
        down = f"{down}rd"
    elif down == 4:
        down = f"{down}th"
        
    ## Convert the current distance to the goaline based on 
    ## what side of the 50 the offense is on
    if disttd > 50:
        disttd = f"{defense}'s {disttd} yardline"
    elif disttd < 50:
        disttd = f"their own {disttd} yardline"
    elif disttd == 50:
        disttd = f"the 50 yardline"

    ## Define natural language options for if the offense is in shotgun formation
    shotgun_options = ["Shotgun formation.", f"{qb} is in shotgun.", "The offense has shotgun formation.", 
                       "Offense in shotgun.", "The QB is in shotgun.", "Shotgun snap.", 
                       "The quarterback is in shotgun formation.", "Shotgun formation for the QB.", 
                       "And the QB is in shotgun.", "The offense went with the QB in shotgun.", 
                       f"{offense} had the QB in shotgun.", f"{offense} put the quarterback in shotgun.", 
                       f"{qb} in shotgun formation.", f"Shotgun for {offense}.", f"Back in shotgun is {qb}.", 
                      "Shotgun.", f"{qb} as QB in shotgun.", "The quarterback in shotgun."]

    ## Define natural language options for if the offense is going no huddle
    no_hud_options = ["The offense went no huddle.", "No huddle.", "No huddle play.", "No huddle offense.", 
                     f"{offense} went no huddle.", f"{offense} offense didn't huddle.", "No huddle needed for the offense", 
                     f"No huddle needed for {offense}.", f"And {qb} did not huddle the offense.", "Huddle was not needed.", 
                     f"Huddle wasn't needed for the {offense} team", "Huddle unnecessary for the offense.", "Play with no huddle", 
                     f"Still no huddle is {offense}.", "Play needed no huddle.", "The offense went with no huddle", 
                     "No huddle necessary for the offense."]

    ## Define the main body of templates for the input text,
    ## this uses the remaining features given for input
    ## There is around 100 options given here
    main_options = [ f"With about {qtr_left} remaining in the {qtr} quarter, {offense} lines up for a {play} on {down} and {ydstogo} from {disttd}. {qb} leads the offense, which is showing {off_look}, while the defense from {defense} sets up in {def_look}.", 
                    f"{offense} faces {down} and {ydstogo} from {disttd} with {qtr_left} to go in the {qtr} quarter. The offensive formation is {off_look}, with {qb} at quarterback. {defense} shows a {def_look} defensive look.", 
                    f"Early in the {qtr} quarter with {qtr_left} remaining, {offense} is preparing a {play} from {disttd}. It's {down} down and {ydstogo} to go. Offensive look: {off_look}. Defensive setup by {defense}: {def_look}.", 
                    f"{offense} sets up on {down} and {ydstogo}, positioned at {disttd}. The clock shows {qtr_left} in the {qtr} quarter. {qb} is in command, working behind a {off_look} formation. {defense} counters with {def_look}.", 
                    f"Clock winding down in the {qtr} quarter — {qtr_left} left. {offense} is lined up on {down} and {ydstogo} at {disttd}. The offense shows {off_look}, and {qb} gets ready to lead the {play}. {defense} is lined up in {def_look}.", 
                    f"Midway through the {qtr} quarter ({qtr_left} left), {offense} has the ball on {down} and {ydstogo} from {disttd}. {qb} prepares to execute a {play}. Formation: {off_look} vs. {defense}'s {def_look}.", 
                    f"In the {qtr} quarter with {qtr_left} left on the clock, {offense} is set up on {down} and {ydstogo} at {disttd}. {qb} is behind a {off_look} offensive formation, while the defense from {defense} lines up in {def_look}.", 
                    f"{offense} looks to convert on {down} and {ydstogo} with {qtr_left} to go in the {qtr}. They’re on {disttd}, running a {play} play with {qb} at the helm and {off_look} personnel. {defense} is showing {def_look}.", 
                    f"{down} and {ydstogo}, {offense} has possession at {disttd}. There's {qtr_left} left in the {qtr} quarter. {qb} leads the offense in a {off_look} look. The defense, {defense}, presents a {def_look} formation.", 
                    f"{offense} lines up for a {play} play on {down} and {ydstogo} from {disttd}. Time remaining in the {qtr} quarter: {qtr_left}. {off_look} is the offensive look, led by {qb}. The defense ({defense}) sets up with {def_look}.", 
                    f"In the {qtr} quarter with about {qtr_left} left, {offense} prepares a {play} on {down} and {ydstogo}. Ball is on {disttd}. {qb} heads the offense in {off_look}, facing {defense}'s {def_look}.", 
                    f"{qb} is at quarterback as {offense} prepares a {play} from {disttd}. It's {down} and {ydstogo} in the {qtr} quarter with {qtr_left} on the clock. The offense lines up in {off_look} against {defense}'s {def_look}.", 
                    f"With about {qtr_left} left in the {qtr} quarter, {offense} is {down} and {ydstogo} from {disttd}. The offense has {off_look} on the field, while {defense} has {def_look} on the field. {qb} is looking to run a {play}", 
                    f"With {qtr_left} left in the {qtr}, it's {down} and {ydstogo} for {offense} at {disttd}. {qb} leads the offense with {off_look} on the field, facing a {def_look} look from {defense}. They're lining up for a {play}.", 
                    f"The {qtr} quarter continues with {offense} on {down} and {ydstogo} from {disttd}. {qtr_left} on the clock. {qb} commands a {off_look} formation against {defense}'s {def_look}, preparing for a {play} play.", 
                    f"{offense} takes the snap on {down} and {ydstogo}, located at {disttd}. {qtr_left} remains in the {qtr}. {qb} is under center, flanked by {off_look}. The {defense} defense sets up in a {def_look} front for the upcoming {play}.", 
                    f"{qtr_left} left in the {qtr} quarter, and {offense} looks to gain {ydstogo} yards from {disttd}. It's {down} down. {qb} gets ready to execute a {play} behind a {off_look} formation, while {defense} prepares in {def_look}.", 
                    f"Approaching crunch time in the {qtr} quarter — {qtr_left} left. {offense} sets up for a {play} on {down} and {ydstogo} from {disttd}. {qb} is under center with {off_look} personnel. {defense} responds with {def_look}.", 
                    f"{offense} starts the play on {down} and {ydstogo} at {disttd}, with {qtr_left} left in the {qtr}. The offense is showing {off_look}, and {qb} prepares for a {play}. {defense} sets up in {def_look}.",
                    f"The ball is placed at {disttd} as {offense} faces {down} and {ydstogo}. {qtr_left} remains in the {qtr} quarter. The offensive unit led by {qb} shows {off_look}; {defense} counters with {def_look}.",
                    f"{offense} is ready for a {play} from {disttd}. It's {down} down and {ydstogo} with {qtr_left} left in the {qtr}. Formation: {off_look}. {qb} leads the offense. Defense ({defense}) lines up in {def_look}.",
                    f"From {disttd}, {offense} is on {down} and {ydstogo}. The time reads {qtr_left} in the {qtr}. {qb} prepares for a {play} behind {off_look}. {defense} positions with a {def_look} scheme.",
                    f"{offense} aligns in a {off_look} setup for {down} and {ydstogo} from {disttd}. {qtr_left} left in the {qtr}. {qb} signals the {play} while {defense} readies in {def_look}.",
                    f"In the {qtr}, {offense} has the ball on {down} and {ydstogo} at {disttd}. {qb} looks over a {off_look} formation. Defense by {defense} shows {def_look}. Time left: {qtr_left}.",
                    f"{qb} is set to lead a {play} for {offense} from {disttd}, facing {down} and {ydstogo}. It's the {qtr} quarter, {qtr_left} to go. Defense from {defense} sets up in {def_look}.",
                    f"{down} down and {ydstogo} for {offense}, the ball rests at {disttd}. {qb} scans the defense from a {off_look} set. Clock reads {qtr_left} in the {qtr}. Defense shows {def_look}.",
                    f"Just {qtr_left} on the clock in the {qtr}. {offense} lines up for {down} and {ydstogo} from {disttd}. The formation is {off_look} with {qb} calling the {play}. {defense} lines up in {def_look}.",
                    f"At {disttd}, {offense} preps for a {play}. It's {down} down and {ydstogo} with {qtr_left} in the {qtr}. {qb} takes charge behind {off_look}. {defense} is in {def_look}.",
                    f"{offense} gets ready for {down} and {ydstogo} from {disttd}. {qb} barks out signals from the {off_look} look. {defense} sets in a {def_look} stance. {qtr} quarter, {qtr_left} remaining.",
                    f"In the {qtr}, {offense} is on {down} and {ydstogo} at {disttd}. Formation: {off_look}. {qb} at the helm. The defense ({defense}) counters with {def_look}. {qtr_left} on the clock.",
                    f"{offense} comes out in {off_look} for a {play} from {disttd}. It's {down} and {ydstogo}, and the {qtr} quarter is winding down with {qtr_left} left. {defense} lines up in {def_look}.",
                    f"At {disttd}, it's {down} and {ydstogo} for {offense}. {qb} lines up behind {off_look}. {qtr_left} left in the {qtr}. The defense by {defense} is set in {def_look}.",
                    f"{qb} leads {offense} into a {play} attempt from {disttd}, facing {down} and {ydstogo}. {qtr} quarter, {qtr_left} left. Defense shows {def_look} against a {off_look} formation.",
                    f"Ball on {disttd}, {down} and {ydstogo} for {offense}. The offense shows {off_look}. Clock reads {qtr_left} in the {qtr}. {qb} ready to run the {play} against {def_look}.",
                    f"Midway in the {qtr}, {offense} lines up for {down} and {ydstogo} from {disttd}. {off_look} is the look, with {qb} ready. Defense from {defense} counters in {def_look}.",
                    f"{offense} preps for a key {play} on {down} and {ydstogo} at {disttd}. The {qtr} quarter shows {qtr_left} remaining. {qb} under center. {defense} shows a {def_look}.",
                    f"{offense} is at the line on {down} and {ydstogo}, ball at {disttd}. {qb} is in {off_look} formation. {qtr_left} remains in the {qtr}. {defense} shows {def_look}.",
                    f"From {disttd}, {offense} is set up for {down} and {ydstogo}. {qb} stands tall behind a {off_look} look. {defense} adjusts into {def_look}. {qtr_left} left in the {qtr}.",
                    f"It's {down} and {ydstogo}, {offense} lines up on {disttd}. {qb} prepares to run a {play}. Offensive look is {off_look}. {defense} counters in {def_look}. {qtr_left} on the clock.",
                    f"{qb} in command for {offense} on {down} and {ydstogo}. {off_look} is the setup from {disttd}. {defense} forms in {def_look}. The {qtr} quarter has {qtr_left} left.",
                    f"{offense} breaks the huddle with {qb} under center for {down} and {ydstogo}. They're at {disttd}, lined up in {off_look}. The defense ({defense}) uses a {def_look} set. {qtr_left} to go.",
                    f"With {qtr_left} left in the {qtr}, {offense} takes the field at {disttd} for a {play}. It's {down} down and {ydstogo}. Formation: {off_look}. Defense responds with {def_look}.",
                    f"{offense} on offense, ball spotted at {disttd}. {down} and {ydstogo}. Formation: {off_look}. {qb} behind center. {defense} showing {def_look}. {qtr_left} remains in the {qtr}.",
                    f"From {disttd}, {qb} prepares for {down} and {ydstogo}. It's the {qtr} quarter, with {qtr_left} left. The offense ({offense}) lines up in {off_look}, {defense} matches with {def_look}.",
                    f"The {qtr} continues as {offense} lines up at {disttd} for {down} and {ydstogo}. {qb} ready in a {off_look} formation. Defense by {defense} shows {def_look}. {qtr_left} to play.",
                    f"Just {qtr_left} to go in the {qtr}. {offense} has {down} and {ydstogo} from {disttd}. {qb} in {off_look}. {defense} defense readies with {def_look}.",
                    f"Snap coming soon: {offense} has {down} and {ydstogo} from {disttd}. {qtr_left} left in the {qtr}. Formation: {off_look}. {qb} calling signals. {defense} in {def_look}.", 
                    f"{qb} prepares for a {play} as {offense} faces {down} and {ydstogo} from {disttd}. It’s the {qtr} with {qtr_left} left. Formation: {off_look}. Defense: {def_look} by {defense}.",
                    f"The play clock winds down as {offense} gets into position for {down} and {ydstogo} from {disttd}. {qb} directs from a {off_look} formation. {defense} counters with {def_look}.",
                    f"On {down} and {ydstogo}, {offense} takes the snap at {disttd}. It’s late in the {qtr} with {qtr_left} left. {qb} in a {off_look} look. Defense lines up in {def_look}.",
                    f"{offense} positions at {disttd} for a {play}. It's {down} and {ydstogo}, {qtr_left} to go in the {qtr}. {qb} behind {off_look}, facing {defense} in {def_look}.",
                    f"{qb} leads {offense} on a {play} attempt from {disttd}. They face {down} and {ydstogo}. {off_look} is the look. {qtr} quarter, {qtr_left} left. {defense} sets up with {def_look}.",
                    f"{down} and {ydstogo} at {disttd} for {offense}. {qb} barks out signals from the {off_look} formation. {qtr_left} in the {qtr}. Defense by {defense} is set in {def_look}.",
                    f"{offense} is at {disttd}, running a {play} on {down} and {ydstogo}. It’s the {qtr} with {qtr_left} left. {qb} in a {off_look} formation. {defense} showing {def_look}.",
                    f"{qb} sets up for {offense} on {down} and {ydstogo} from {disttd}. With {qtr_left} in the {qtr}, the formation is {off_look}. {defense} shows a {def_look} front.",
                    f"Time ticking in the {qtr}, {qtr_left} left. {offense} sets up for {down} and {ydstogo} from {disttd}. {qb} commands from a {off_look} look. {defense} counters with {def_look}.",
                    f"{offense} comes out in {off_look} for {down} and {ydstogo} at {disttd}. {qb} under center. The defense ({defense}) shows {def_look}. Time left: {qtr_left} in the {qtr}.",
                    f"It’s {down} down and {ydstogo} for {offense}, who line up at {disttd}. {qtr_left} remains in the {qtr}. {qb} leads from a {off_look} setup. {defense} lines up in {def_look}.",
                    f"{qb} prepares to snap for {offense} on {down} and {ydstogo} from {disttd}. {qtr_left} remains in the {qtr}. Formation: {off_look}. {defense} shows a {def_look} alignment.",
                    f"{offense} sets for a {play} on {down} and {ydstogo} from {disttd}. In the {qtr} with {qtr_left} remaining, {qb} lines up behind a {off_look} formation. Defense: {def_look}.",
                    f"The ball is at {disttd}, and {offense} is looking at {down} and {ydstogo}. {qb} prepares for a {play}. {off_look} is the formation. {qtr} quarter, {qtr_left} left. Defense: {def_look}.",
                    f"{qb} gets the offense ready at {disttd} for {down} and {ydstogo}. With {qtr_left} left in the {qtr}, they show {off_look}. The {defense} defense readies in {def_look}.",
                    f"{offense} breaks the huddle and lines up at {disttd} for {down} and {ydstogo}. {qb} stands in {off_look}. The clock shows {qtr_left} in the {qtr}. Defense: {def_look}.",
                    f"{offense} sets up for a big play: {down} and {ydstogo} at {disttd}. {qtr_left} left in the {qtr}. Formation is {off_look} with {qb} calling signals. {defense} is in {def_look}.",
                    f"{qb} has {offense} lined up at {disttd}. It’s {down} and {ydstogo} with {qtr_left} left in the {qtr}. The offense shows {off_look}. {defense} prepares with {def_look}.",
                    f"The {qtr} quarter continues with {offense} on {down} and {ydstogo} at {disttd}. {qb} surveys the defense from a {off_look} formation. {defense} is in {def_look}. Time: {qtr_left}.",
                    f"{offense} gets into position at {disttd} for a {play}. It’s {down} and {ydstogo} in the {qtr} with {qtr_left} left. Formation: {off_look}. Defense: {def_look}. {qb} leads the charge.",
                    f"{qb} takes the field for {offense} on {down} and {ydstogo} from {disttd}. {qtr_left} in the {qtr}. Formation: {off_look}. {defense} prepares in {def_look}.",
                    f"With {qtr_left} left in the {qtr}, it’s {down} and {ydstogo} for {offense} from {disttd}. {qb} is ready behind a {off_look} look. {defense} matches up in {def_look}.",
                    f"Ball at {disttd}, it’s {down} and {ydstogo} for {offense}. {qb} in command from the {off_look} formation. {defense} shows a {def_look} front. {qtr} quarter, {qtr_left} left.",
                    f"{qb} preps a {play} for {offense}, positioned on {down} and {ydstogo} at {disttd}. Formation is {off_look}. Time remaining in {qtr}: {qtr_left}. {defense} set in {def_look}.",
                    f"The {qtr} quarter ticks on with {qtr_left} remaining. {offense} has {down} and {ydstogo} from {disttd}. {qb} leads from {off_look}. {defense} sets up with {def_look}.",
                    f"With {qtr_left} left, {offense} prepares for a {play} from {disttd}. It’s {down} and {ydstogo}. {qb} stands behind a {off_look} formation. Defense by {defense} shows {def_look}.",
                    f"{offense} is lined up on {down} and {ydstogo} from {disttd}. It’s the {qtr} quarter with {qtr_left} left. {qb} in {off_look}. {defense} counters with {def_look}.",
                    f"{down} and {ydstogo}, {offense} is ready at {disttd}. {qb} preps a {play} behind {off_look}. {qtr_left} remains in the {qtr}. Defense: {def_look} from {defense}.",
                    f"At {disttd}, {offense} gets ready for {down} and {ydstogo}. {qb} commands from {off_look}. {qtr_left} on the clock. {defense} shows a {def_look}.",
                    f"In the {qtr} quarter with {qtr_left} left, {offense} is positioned for a {play}. It’s {down} and {ydstogo} at {disttd}. Formation: {off_look}. Defense: {def_look}.",
                    f"{qb} leads {offense} on {down} and {ydstogo}, setting up at {disttd}. {off_look} is the formation. {qtr_left} left in the {qtr}. Defense by {defense} is showing {def_look}.",
                    f"{offense} takes the field on {down} and {ydstogo}. Ball on {disttd}. {qb} aligns the team in {off_look}. {defense} sets up in {def_look}. Time remaining: {qtr_left} of the {qtr}.", 
                    f"{qb} commands the huddle as {offense} lines up for {down} and {ydstogo} from {disttd}. {qtr_left} remains in the {qtr}. Formation: {off_look}, Defense: {def_look}.",
                    f"Play clock winding down in the {qtr}, {qtr_left} to go. {offense} at {disttd} for {down} and {ydstogo}. {qb} sets in {off_look}. Defense: {def_look}.",
                    f"{offense} lines up in {off_look} for a {play} from {disttd}. It’s {down} and {ydstogo}. {qb} faces off against {defense} in a {def_look} defense. Time left: {qtr_left}.",
                    f"{down} and {ydstogo} coming up for {offense}, stationed at {disttd}. {qb} surveys the field from a {off_look} look. It’s the {qtr} with {qtr_left} remaining. {defense} shows {def_look}.",
                    f"{qb} leads {offense} out for a critical {play}. Ball on {disttd}, {down} and {ydstogo}. Formation: {off_look}. {qtr_left} in the {qtr}. {defense} shows {def_look}.",
                    f"In the {qtr} quarter with {qtr_left} remaining, {offense} has {down} and {ydstogo} at {disttd}. {qb} steps into a {off_look} formation. {defense} prepares in {def_look}.",
                    f"Late in the {qtr}, {qtr_left} remaining. {offense} faces {down} and {ydstogo} from {disttd}. {qb} sets the offense in {off_look}. {defense} counters with {def_look}.",
                    f"{qb} has {offense} aligned for a {play} at {disttd}, facing {down} and {ydstogo}. Time in the {qtr}: {qtr_left}. Formation: {off_look}. {defense} lines up in {def_look}.",
                    f"Crunch time in the {qtr}, {qtr_left} on the clock. {offense} with {down} and {ydstogo} from {disttd}. {qb} leads in {off_look}. {defense} answers in {def_look}.",
                    f"{offense} gets in formation at {disttd}. {down} and {ydstogo}, and the clock reads {qtr_left} in the {qtr}. {qb} stands tall behind the {off_look}. {defense}: {def_look}.",
                    f"{qb} and {offense} are ready for a {play}. Ball spotted at {disttd}, it's {down} and {ydstogo}. {qtr_left} to go in the {qtr}. Defense: {def_look} from {defense}.",
                    f"It’s {down} and {ydstogo} at {disttd}, and {offense} is in {off_look}. {qb} eyes the {defense} defense showing {def_look}. {qtr_left} left in the {qtr}.",
                    f"{qb} takes command as {offense} readies for a {play}. They’re facing {down} and {ydstogo} from {disttd}. Clock shows {qtr_left} in the {qtr}. {defense} shows {def_look}.",
                    f"Time: {qtr_left} left in the {qtr}. {offense} faces {down} and {ydstogo} at {disttd}. {qb} is under center in {off_look}. {defense} shows {def_look}.",
                    f"{offense} comes to the line on {down} and {ydstogo} from {disttd}. {qb} preps a {play} from the {off_look} look. {qtr_left} in the {qtr}. Defense: {def_look}.",
                    f"{qb} and the offense break huddle for {down} and {ydstogo}. Ball at {disttd}. Time: {qtr_left} in the {qtr}. Formation: {off_look}, Defense: {def_look}.",
                    f"Approaching the end of the {qtr}, {qtr_left} left. {offense} at {disttd} with {down} and {ydstogo}. {qb} directs traffic from the {off_look}. {defense}: {def_look}.",
                    f"{qb} leads a {play} from the {off_look} look. {offense} faces {down} and {ydstogo} at {disttd}, {qtr_left} to go in the {qtr}. {defense} prepares in {def_look}."]

    # Choose the random main body for the input text
    input = random.choice(main_options)

    # Check if the offense was in shotgun formation and append a random option to the body of the input text if so
    if sg == 1.0:
        input += " " + random.choice(shotgun_options)

    # Check if the offense went no huddle and append a random option to the body of the input text if so
    if no_hud == 1.0:
        input += " " + random.choice(no_hud_options)

    return input


## Function to generate target_text and outcome_label
def generate_output_text(row):
    #### For T5 output

    ## Get the features needed from the current row
    desc = row['desc']
    qb = row['passer_player_name']

    # Define the regex to select all of the player names and their numbers
    ## This looks like (## - P. Mahomes)
    player_name_regex = r"\d{1,2}-[A-Z]\.[A-Za-z]+"
    matches = re.findall(player_name_regex, desc)

    # Check each of the player names against the QB
    for player in matches:
        # Remove the dash and jersey number as these aren't in the
        # passer_player_name
        player_ops = [player[2:], player[3:], "The QB"]

        # If the name is not the QBs, then anonymize to a player
        if qb not in player_ops:
            desc = desc.replace(player, 'a player')

    # Define regex to select all of the variations of parentesis
    # In the play by play this is the game clock so removing this gets 
    # rid of the problem of the model guessing those numbers
    parenthesis_regex = r"\([^)]*\)"
    matches = re.findall(parenthesis_regex, desc)

    # Remove all the parenthesis instances
    for parenthesis in matches:
        desc = desc.replace(parenthesis, "")
        
    #### For BERT Output

    # Get all of the features needed for the current row
    fd = row['first_down']
    td = row['touchdown']
    yds = row['yards_gained']
    ydstogo = row['ydstogo']
    fumble = row['fumble_lost']
    interception = row['interception']
    down = int(row['down'])
    ic = row['incomplete_pass']

    label = None

    # Assign the labels mutually exclusively, this is defined
    # in more detail in the report
    if td == 1.0:
        label = 'touchdown'
    elif fd == 1.0:
        label = 'first_down'
    elif fumble == 1.0 or interception == 1.0:
        label = 'turnover'
    elif down == 3 and fd != 1.0:
        label = 'failed_third_down_conversion'
    elif ic == 1.0:
        label = 'incomplete_pass'
    elif yds < 0:
        label = 'yardage_lost'
    elif yds == 0:
        label = 'no_gain'
    elif yds > 0:
        label = 'yardage_gained'
    else:
        label = 'no_label'
        

    return desc, label
    

 
# Define a function to convert each rows data inputs and outputs to text for the model
def format_as_nl(row):
    input = generate_input_text(row)
    output_t5, output_bert = generate_output_text(row)

    return {"input_text": input,
           "target_text": output_t5,
           "outcome_label": output_bert}
    
# Apply the constructed functions to the data and convert to a df
final_data = df.apply(format_as_nl, axis=1).tolist()
final_data = pd.DataFrame(final_data)


## Splitting the Data for the Two Models

In [None]:
# Shuffle the data
df_shuffled = final_data.sample(frac=1, random_state=123).reset_index(drop=True)

## Split the data based on the two models, so 50/50
split = int(len(df_shuffled) * 0.5)

t5_data = df_shuffled[:split].reset_index(drop=True)
bert_data = df_shuffled[split:].reset_index(drop=True)

## Split the t5 data for train/val/test as 80/10/10
train = int(len(t5_data) * 0.8)
val = train + int(len(t5_data) * 0.1)

t5_train = t5_data[:train].reset_index(drop=True)
t5_val = t5_data[train:val].reset_index(drop=True)
t5_test = t5_data[val:].reset_index(drop=True)

print(len(t5_data), len(t5_train), len(t5_val), len(t5_test))

## Split the BERT data for train/val/test1/test2 as 80/10/5/5
train = int(len(bert_data) * 0.8)
val = train + int(len(bert_data) * 0.1)
test1 = val + int(len(bert_data) * 0.05)

bert_train = bert_data[:train].reset_index(drop=True)
bert_val = bert_data[train:val].reset_index(drop=True)
bert_test1 = bert_data[val:test1].reset_index(drop=True)
bert_test2 = bert_data[test1:].reset_index(drop=True)

print(len(bert_data), len(bert_train), len(bert_val), len(bert_test1), len(bert_test2))

In [None]:
bert_data['outcome_label'].value_counts()

In [None]:
for i in range(2):
    print(bert_data['input_text'][i])
    print(bert_data['target_text'][i])
    print(bert_data['outcome_label'][i])

In [None]:
for i in range(2):
    print(t5_data['input_text'][i])
    print(t5_data['target_text'][i])
    print(t5_data['outcome_label'][i])

## Baselines

In [None]:
## Getting baselines for the BERT model

## Baseline One: Using random class label
class_labels = ['touchdown', 'first_down', 'turnover', 'failed_third_down_conversion', 'incomplete_pass', 
               'yardage_lost', 'no_gain', 'yardage_gained']

num_y = len(bert_test1)

random.seed(123)
baseline_one_bert = random.choices(class_labels, k = num_y)

true_y = list(bert_test1['outcome_label'])

print("Baseline One Accuracy:", accuracy_score(true_y, baseline_one_bert))
print("Baseline One F1:", f1_score(true_y, baseline_one_bert, average = 'macro'))

## Baseline Two: Using majority class label
baseline_two_bert = ['yardage_gained'] * num_y

print("Baseline Two Accuracy:", accuracy_score(true_y, baseline_two_bert))
print("Baseline Two F1:", f1_score(true_y, baseline_two_bert, average = 'macro'))

In [None]:
## Getting baselines for the T5 model

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

## Baseline One: shuffling the target text for the input_text
baseline_one_t5 = list(t5_test["target_text"])
random.seed(123)
random.shuffle(baseline_one_t5)

true_y = list(t5_test['target_text'])

bleu_score = bleu.compute(predictions = baseline_one_t5, references = [[y] for y in true_y])
rouge_scores = rouge.compute(predictions = baseline_one_t5, references = true_y)

print("Baseline One BLEU:", bleu_score['bleu'])
print('Baseline One Rouge One:', rouge_scores['rouge1'])
print('Baseline One Rouge Two:', rouge_scores['rouge2'])
print('Baseline One Rouge L:', rouge_scores['rougeL'])


## Baseline TwoL templated target text based on outcome
def template_maker(row):
    outcome = row['outcome_label']
    
    if outcome == 'touchdown':
        output = "The offense got a touchdown."
    elif outcome == 'first_down':
        output = "The offense got a first down."
    elif outcome == 'no_gain':
        output = "There was no gain on the play."
    elif outcome == 'failed_third_down_conversion':
        output = "The offense failed on third down."
    elif outcome == 'yardage_lost':
        output = "The offense lost some yards on the play."
    elif outcome == 'yardage_gained':
        output = "The offense gained some yards."
    elif outcome == 'incomplete_pass':
        output = "The pass was incomplete."
    elif outcome == 'turnover':
        output = "There was a turnover on the play."
    else:
        output = "The play had an unknown outcome."
    
    return output

baseline_two_t5 = [template_maker(row) for _, row in t5_test.iterrows()]

bleu_score = bleu.compute(predictions = baseline_two_t5, references = [[y] for y in true_y])
rouge_scores = rouge.compute(predictions = baseline_two_t5, references = true_y)

print("Baseline Two BLEU:", bleu_score['bleu'])
print('Baseline Two Rouge One:', rouge_scores['rouge1'])
print('Baseline Two Rouge Two:', rouge_scores['rouge2'])
print('Baseline Two Rouge L:', rouge_scores['rougeL'])

## Tokenize the Data for the T5 Model

In [None]:
# Load tokenizer using pretrained t5 small
tokenizer_t5 = T5Tokenizer.from_pretrained('t5-small')

# Convert dfs to datasets since the tokenizer needs them like this
t5_train_set = Dataset.from_pandas(t5_train[['input_text', 'target_text']])
t5_val_set = Dataset.from_pandas(t5_val[['input_text', 'target_text']])
t5_test_set = Dataset.from_pandas(t5_test[['input_text', 'target_text']])

# Function to tokenize the data followind ref in report doc 
def tokenize(data_set):
    # Tokenize the input text
    input_enc = tokenizer_t5(data_set['input_text'], max_length = 512, padding = 'max_length', truncation = True)

    # Tokenize the target text
    target_enc = tokenizer_t5(data_set['target_text'], max_length = 128, padding = 'max_length', truncation = True)

    # Place these in a dict format
    return {'input_ids': input_enc.input_ids, 'attention_mask': input_enc.attention_mask, 'labels': target_enc.input_ids}

# Final dataset dict
t5_split_dataset = {'train': t5_train_set, 'validation': t5_val_set, 'test': t5_test_set}

# Tokenize all splits
t5_tokenized_datasets = {split: data_set.map(tokenize, batched = False) for split, data_set in t5_split_dataset.items()}

## Tune for T5 Hyperparameters

In [None]:
#### Tuning weight decay larger = 0.01

# Load model using the t5 small model
model = T5ForConditionalGeneration.from_pretrained('t5-small')

# Establish the training arguments
# One epoch since this is tuning hyperparameters
training_args = TrainingArguments(output_dir = './t5-wd-0.01', 
                                  num_train_epochs = 1, 
                                  per_device_train_batch_size = 16, 
                                  per_device_eval_batch_size = 16, 
                                  warmup_steps = 10, 
                                  weight_decay = 0.01, 
                                  logging_dir = './logs', 
                                  logging_steps = 20, 
                                  save_total_limit = 1, 
                                  save_strategy = 'epoch', 
                                  evaluation_strategy = 'epoch', 
                                  fp16 = False)

# Apply the training arguments, following ref in report doc, as well as 
# similarities to HW3
trainer = Trainer(model = model, 
                  args = training_args, 
                  train_dataset = t5_tokenized_datasets['train'], 
                  eval_dataset = t5_tokenized_datasets['validation'])

trainer.train()

In [None]:
## Evaluate for hyperparameter wd = 0.01
# Put model to cpu for faster prediction
model = model.to('cpu')

# Initialize empty list for the predictions and true_y
predictions = []
true_y = []

# Loop through each item in the test dataset
## Uses ref from report doc
for row in t5_tokenized_datasets['test']:
    # Tokenize the input text
    input_tokenized = tokenizer_t5(row['input_text'], return_tensors = "pt", truncation = True, padding = True).input_ids
    
    # Generate the models generated text based on the input text
    output_tokenized = model.generate(input_tokenized, max_length = 128)[0]

    # Decode the generated response to a string
    pred = tokenizer_t5.decode(output_tokenized, skip_special_tokens = True)

    # Get the corresponding true value for the current row
    y = row['target_text']

    # Add the generated prediction and true value to the lists
    predictions.append(pred)
    true_y.append(y)

bleu = evaluate.load("bleu")
bleu_score = bleu.compute(predictions = predictions, references = [[y] for y in true_y])
print("BLEU:", bleu_score['bleu'])

In [None]:
#### Tuning weight decay smaller = 0.0001

# Load model using the t5 small model
model = T5ForConditionalGeneration.from_pretrained('t5-small')

# Establish the training arguments
# One epoch since this is tuning hyperparameters
training_args = TrainingArguments(output_dir = './t5-wd-0.0001', 
                                  num_train_epochs = 1, 
                                  per_device_train_batch_size = 16, 
                                  per_device_eval_batch_size = 16, 
                                  warmup_steps = 10, 
                                  weight_decay = 0.0001, 
                                  logging_dir = './logs', 
                                  logging_steps = 20, 
                                  save_total_limit = 1, 
                                  save_strategy = 'epoch', 
                                  evaluation_strategy = 'epoch', 
                                  fp16 = False)

# Apply the training arguments, following ref in report doc, as well as 
# similarities to HW3
trainer = Trainer(model = model, 
                  args = training_args, 
                  train_dataset = t5_tokenized_datasets['train'], 
                  eval_dataset = t5_tokenized_datasets['validation'])

trainer.train()

In [None]:
## Evaluate for hyperparameter wd = 0.0001
# Put model to cpu for faster prediction
model = model.to('cpu')

# Initialize empty list for the predictions and true_y
predictions = []
true_y = []

# Loop through each item in the test dataset
## Uses ref from report doc
for row in t5_tokenized_datasets['test']:
    # Tokenize the input text
    input_tokenized = tokenizer_t5(row['input_text'], return_tensors = "pt", truncation = True, padding = True).input_ids
    
    # Generate the models generated text based on the input text
    output_tokenized = model.generate(input_tokenized, max_length = 128)[0]

    # Decode the generated response to a string
    pred = tokenizer_t5.decode(output_tokenized, skip_special_tokens = True)

    # Get the corresponding true value for the current row
    y = row['target_text']

    # Add the generated prediction and true value to the lists
    predictions.append(pred)
    true_y.append(y)

bleu = evaluate.load("bleu")
bleu_score = bleu.compute(predictions = predictions, references = [[y] for y in true_y])
print("BLEU:", bleu_score['bleu'])

## Training Final T5 Model

In [None]:
#### Final model

# Load model using the t5 small model
model_t5 = T5ForConditionalGeneration.from_pretrained('t5-small')

# Establish the training arguments
# Three epochs since this is the final model
training_args = TrainingArguments(output_dir = './t5-final-model', 
                                  num_train_epochs = 3, 
                                  per_device_train_batch_size = 16, 
                                  per_device_eval_batch_size = 16, 
                                  warmup_steps = 10, 
                                  weight_decay = 0.01, 
                                  logging_dir = './logs', 
                                  logging_steps = 20, 
                                  save_total_limit = 1, 
                                  save_strategy = 'epoch', 
                                  evaluation_strategy = 'epoch', 
                                  fp16 = False)

# Apply the training arguments, following ref in report doc, as well as 
# similarities to HW3
trainer_t5 = Trainer(model = model_t5, 
                  args = training_args, 
                  train_dataset = t5_tokenized_datasets['train'], 
                  eval_dataset = t5_tokenized_datasets['validation'])

trainer_t5.train()

## Evaluate the Final T5 Model

In [None]:
# Put model to cpu for faster prediction
model_t5 = model_t5.to('cpu')

# Initialize empty list for the predictions and true_y
predictions = []
true_y = []

# Loop through each item in the test dataset
## Uses ref from report doc
for row in t5_tokenized_datasets['test']:
    # Tokenize the input text
    input_tokenized = tokenizer_t5(row['input_text'], return_tensors = "pt", truncation = True, padding = True).input_ids
    
    # Generate the models generated text based on the input text
    output_tokenized = model_t5.generate(input_tokenized, max_length = 128)[0]

    # Decode the generated response to a string
    pred = tokenizer_t5.decode(output_tokenized, skip_special_tokens = True)

    # Get the corresponding true value for the current row
    y = row['target_text']

    # Add the generated prediction and true value to the lists
    predictions.append(pred)
    true_y.append(y)

In [None]:
# Compute the BLEU score given the predicitons and true values
bleu_score = bleu.compute(predictions = predictions, references = [[y] for y in true_y])

# Compute the Rouge scores given the predictions and true values
rouge_score = rouge.compute(predictions = predictions, references = true_y)

# Print out the BLEU and Rogue scores
print("BLEU:", bleu_score['bleu'])
print("ROUGE-1:", rouge_score['rouge1'])
print("ROUGE-2:", rouge_score['rouge2'])
print("ROUGE-L:", rouge_score['rougeL'])

## Tokenize Data for BERT Model

In [None]:
## Estabish an encoder to change the outcome labels to multi class label numbers
# Uses ref from report doc as well as stackoverflow
encoder = LabelEncoder()
encoder.fit(bert_train['outcome_label'])

# Apply the encoder to each split of the data so instead of text labels
# each text label now corresponds to a number for that class
bert_train['label'] = encoder.transform(bert_train['outcome_label'])
bert_val['label'] = encoder.transform(bert_val['outcome_label'])
bert_test1['label'] = encoder.transform(bert_test1['outcome_label'])
bert_test2['label'] = encoder.transform(bert_test2['outcome_label'])

In [None]:
# Put all the data into a dataset object dict 
# this is the format needed for training recommended by ref in report doc
bert_dataset = DatasetDict({"train": Dataset.from_pandas(bert_train), 
                            "val": Dataset.from_pandas(bert_val), 
                            "test": Dataset.from_pandas(bert_test1)})

# Initialize the tokenizer, same tokenizer from one of the homeworks
# or from previous class, that's why it was chosen
tokenizer_bert = AutoTokenizer.from_pretrained("bert-base-uncased")

# Function to tokenize the dataset for each row within it
# Establoshed using ref from the report doc
def tokenize(row):
    return tokenizer_bert(row['target_text'], truncation = True, padding = "max_length", max_length = 128)

# Map the tokenize function for each row within the dataset
# So this tokenizes all of the data: train, val, and test1
tokenized = bert_dataset.map(tokenize, batched = True)

# Ensure that the dataset is in the proper format for training
# Established with helpf from ref in report doc
tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

## Tune for BERT Hyperparameters

In [None]:
## Tuning for weight decay larger = 0.01

# Initialize the model from the pre trained model corresponding to the tokenizer
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=8)  

# Function to compute the F1 and accuracy for training and for evaluating later
# Described from previous homeworks, and used from other courses
def compute_metrics(p):
    logits, labels = p
    predictions = np.argmax(logits, axis=1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average = 'macro') 
    return {"accuracy": accuracy, "f1": f1}

# Establish the training arguments
# Train one epoch because it is just fine tuning hyper parameters
training_args = TrainingArguments(output_dir = "./BERT-wd-0.01",
                                  num_train_epochs = 1, 
                                  per_device_train_batch_size = 16, 
                                  per_device_eval_batch_size = 16,
                                  warmup_steps = 500,
                                  weight_decay = 0.01, 
                                  logging_dir = "./logs",
                                  logging_steps = 100)

# Apply the training arguments, following ref in report doc, as well as 
# similarities to HW3
trainer = Trainer(model = model, 
                  args = training_args, 
                  train_dataset = tokenized["train"], 
                  eval_dataset = tokenized["val"], 
                  compute_metrics = compute_metrics)

trainer.train()
# Get the results and print them for the F1 and accuracy
eval_results = trainer.evaluate(tokenized["test"])
print(eval_results)

In [None]:
## Tuning for weight decay smaller = 0.0001

# Initialize the model from the pre trained model corresponding to the tokenizer
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=8)  

# Function to compute the F1 and accuracy for training and for evaluating later
# Described from previous homeworks, and used from other courses
def compute_metrics(p):
    logits, labels = p
    predictions = np.argmax(logits, axis=1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average = 'macro') 
    return {"accuracy": accuracy, "f1": f1}

# Establish the training arguments
# Train one epoch because it is just fine tuning hyper parameters
training_args = TrainingArguments(output_dir = "./BERT-wd-0.0001",
                                  num_train_epochs = 1, 
                                  per_device_train_batch_size = 16, 
                                  per_device_eval_batch_size = 16,
                                  warmup_steps = 500,
                                  weight_decay = 0.0001, 
                                  logging_dir = "./logs",
                                  logging_steps = 100)

# Apply the training arguments, following ref in report doc, as well as 
# similarities to HW3
trainer = Trainer(model = model, 
                  args = training_args, 
                  train_dataset = tokenized["train"], 
                  eval_dataset = tokenized["val"], 
                  compute_metrics = compute_metrics)

trainer.train()
# Get the results and print them for the F1 and accuracy
eval_results = trainer.evaluate(tokenized["test"])
print(eval_results)

## Train Final BERT Model

In [None]:
## Training final BERT model

# Initialize the model from the pre trained model corresponding to the tokenizer
model_bert = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=8)  

# Function to compute the F1 and accuracy for training and for evaluating later
# Described from previous homeworks, and used from other courses
def compute_metrics(p):
    logits, labels = p
    predictions = np.argmax(logits, axis=1)
    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average = 'macro') 
    return {"accuracy": accuracy, "f1": f1}

# Establish the training arguments
# Train three epochs because final model
training_args = TrainingArguments(output_dir = "./BERT-final-model",
                                  num_train_epochs = 3, 
                                  per_device_train_batch_size = 16, 
                                  per_device_eval_batch_size = 16,
                                  warmup_steps = 500,
                                  weight_decay = 0.01, 
                                  logging_dir = "./logs",
                                  logging_steps = 100)

# Apply the training arguments, following ref in report doc, as well as 
# similarities to HW3
trainer_bert = Trainer(model = model_bert, 
                  args = training_args, 
                  train_dataset = tokenized["train"], 
                  eval_dataset = tokenized["val"], 
                  compute_metrics = compute_metrics)

In [None]:
trainer_bert.train()

## Evaluate the BERT Model

In [None]:
# Get the results and print them for the F1 and accuracy
eval_results = trainer_bert.evaluate(tokenized["test"])
print(eval_results)

## Evaluate the T5 Model Using the BERT Model

In [None]:
# Load tokenizer using pretrained t5 small
tokenizer_t5 = T5Tokenizer.from_pretrained('t5-small')

# Convert dfs to datasets since the tokenizer needs them like this
t5_set = Dataset.from_pandas(bert_test2[['input_text', 'target_text']])

# Function to tokenize the data followind ref in report doc 
def tokenize(data_set):
    # Tokenize the input text
    input_enc = tokenizer_t5(data_set['input_text'], max_length = 512, padding = 'max_length', truncation = True)

    # Tokenize the target text
    target_enc = tokenizer_t5(data_set['target_text'], max_length = 128, padding = 'max_length', truncation = True)

    # Place these in a dict format
    return {'input_ids': input_enc.input_ids, 'attention_mask': input_enc.attention_mask, 'labels': target_enc.input_ids}

# Final dataset dict
t5_tokenized_set = {'test2': t5_set}

# Tokenize all splits
t5_tokenized_datasets_test2 = {split: data_set.map(tokenize, batched = False) for split, data_set in t5_tokenized_set.items()}

In [None]:
# Put model to cpu for faster prediction
model_t5 = model_t5.to('cpu')

# Initialize empty list for the predictions
target_text = []

# Loop through each item in the test dataset
## Uses ref from report doc
for row in t5_tokenized_datasets_test2['test2']:
    # Tokenize the input text
    input_tokenized = tokenizer_t5(row['input_text'], return_tensors = "pt", truncation = True, padding = True).input_ids
    
    # Generate the models generated text based on the input text
    output_tokenized = model_t5.generate(input_tokenized, max_length = 128)[0]

    # Decode the generated response to a string
    pred = tokenizer_t5.decode(output_tokenized, skip_special_tokens = True)

    # Get the corresponding true value for the current row
    y = row['target_text']

    # Add the generated prediction and true value to the lists
    target_text.append(pred)

bert_test2['target_text'] = target_text

In [None]:
# Put all the data into a dataset object dict 
# this is the format needed for training recommended by ref in report doc
bert_dataset = DatasetDict({"test2": Dataset.from_pandas(bert_test2)})

# Initialize the tokenizer, same tokenizer from one of the homeworks
# or from previous class, that's why it was chosen
tokenizer_bert = AutoTokenizer.from_pretrained("bert-base-uncased")

# Function to tokenize the dataset for each row within it
# Establoshed using ref from the report doc
def tokenize(row):
    return tokenizer_bert(row['target_text'], truncation = True, padding = "max_length", max_length = 128)

# Map the tokenize function for each row within the dataset
# So this tokenizes all of the data: train, val, and test1
tokenized = bert_dataset.map(tokenize, batched = True)

# Ensure that the dataset is in the proper format for training
# Established with helpf from ref in report doc
tokenized.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

In [None]:
# Get the results and print them for the F1 and accuracy
eval_results = trainer_bert.evaluate(tokenized["test2"])
print(eval_results)