In [None]:
from transformers import AutoTokenizer, AutoModelForTableQuestionAnswering

tokenizer = AutoTokenizer.from_pretrained("google/tapas-base-finetuned-wtq")

model = AutoModelForTableQuestionAnswering.from_pretrained("google/tapas-base-finetuned-wtq")

# Installs

In [1]:
!pip install -q transformers==4.4.2

In [3]:
import torch
torch.__version__

'1.13.0'

In [4]:
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html

Looking in links: https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
Collecting torch-scatter
  Using cached torch_scatter-2.1.0.tar.gz (106 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: torch-scatter
  Building wheel for torch-scatter (setup.py) ... [?25ldone
[?25h  Created wheel for torch-scatter: filename=torch_scatter-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl size=353673 sha256=b85b803c9c465a93f208420f40ca43e055f5cdd103ffa8c163cedef79048b68e
  Stored in directory: /Users/donaldkane/Library/Caches/pip/wheels/bf/08/9d/28fef90c720e723909a15628bda60b030ebe174780c129a040
Successfully built torch-scatter
Installing collected packages: torch-scatter
Successfully installed torch-scatter-2.1.0


# Imports

In [5]:
from transformers import pipeline
import pandas as pd

In [26]:
tqa = pipeline(task="table-question-answering", 
               model="google/tapas-base-finetuned-wtq")

# Reading Data

In [18]:
table = pd.read_csv("/Users/donaldkane/Desktop/Transformers-Project-Sports-Analysis/TableQuestionAnswering/superbowl.csv")
table = table.astype(str)

In [19]:
table

Unnamed: 0,Player,Team,Completions,Pass Attempts,Passing Yards,Touchdowns,Interceptions,Sacks,Yds,Lng,...,Rushing Yards,Rushing Touchdwons,Lng.1,Targets,Receptions,Receiving Yards,TDs,Lng.2,Fumbles,FL
0,Joe Burrow,CIN,22,33,263,1,0,7,43,75,...,3,0,4,0,0,0,0,0,0,0
1,Joe Mixon,CIN,1,1,6,1,0,0,0,6,...,72,0,14,6,5,1,0,4,0,0
2,Ja'Marr Chase,CIN,0,0,0,0,0,0,0,0,...,4,0,4,8,5,89,0,46,0,0
3,Tee Higgins,CIN,0,0,0,0,0,0,0,0,...,0,0,0,7,4,100,2,75,0,0
4,Tyler Boyd,CIN,0,0,0,0,0,0,0,0,...,0,0,0,6,5,48,0,16,0,0
5,Mike Thomas,CIN,0,0,0,0,0,0,0,0,...,0,0,0,1,1,17,0,17,0,0
6,C.J. Uzomah,CIN,0,0,0,0,0,0,0,0,...,0,0,0,2,2,11,0,6,0,0
7,Chris Evans,CIN,0,0,0,0,0,0,0,0,...,0,0,0,2,1,3,0,3,0,0
8,Samaje Perine,CIN,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,Matthew Stafford,LAR,26,40,283,3,2,2,13,35,...,6,0,7,1,0,0,0,0,0,0


# Superbowl

In [20]:
query = "How many passing yards did Stafford have?"
print(tqa(table=table, query=query)["answer"])

SUM > 283


In [21]:
query = ["Matthew Stafford touchdowns?", 
         "Which team does Stafford play for?"]
answer = tqa(table=table, query=query)
for ans in answer:
    print(ans["answer"])

SUM > 3
LAR


In [22]:
query = ["Tee Higgins Tds?", 
         "How many receiving yards did Higgins have?"]
answer = tqa(table=table, query=query)
for ans in answer:
    print(ans["answer"])

SUM > 2
SUM > 100


In [27]:
query = ["How many yards did Burrow throw for?", 
         "Which team did he play for?"]
answer = tqa(table=table, query=query)
for ans in answer:
    print(ans["answer"])

AVERAGE > 263
CIN


In [29]:
query = "How many people had over 1 passing yards?"
print(tqa(table=table, query=query)["answer"])

COUNT > Joe Burrow, Joe Mixon, Matthew Stafford


# Training

In [None]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFace

# gets role for executing training job
iam_client = boto3.client('iam')
role = iam_client.get_role(RoleName='{IAM_ROLE_WITH_SAGEMAKER_PERMISSIONS}')['Role']['Arn']
hyperparameters = {
    'model_name_or_path':'google/tapas-base-finetuned-wtq',
    'output_dir':'/opt/ml/model'
    # add your remaining hyperparameters
    
    # more info here https://github.com/huggingface/transformers/tree/v4.17.0/examples/pytorch/question-answering
}

# git configuration to download our fine-tuning script
git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.17.0'}

# creates Hugging Face estimator
huggingface_estimator = HuggingFace(
    entry_point='run_qa.py',
    source_dir='./examples/pytorch/question-answering',
    instance_type='ml.p3.2xlarge',
    instance_count=1,
    role=role,
    git_config=git_config,
    transformers_version='4.17.0',
    pytorch_version='1.10.2',
    py_version='py38',
    hyperparameters = hyperparameters
)

# starting the train job
huggingface_estimator.fit()