<a href="https://colab.research.google.com/github/peacekurella/SimpleTransformersExamples/blob/master/SimpleTranformersQA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!git clone https://github.com/peacekurella/simpletransformers.git
%cd simpletransformers/
!git checkout origin/wandblogs
!pip install -e .
!pip install wandb
%cd ..

In [2]:
%%capture
!mkdir RawData
!curl https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json --output RawData/train.json
!curl https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json --output RawData/eval.json

In [3]:
train_args = {
    'learning_rate':                   3e-5, # learning rate of our model
    'num_train_epochs':                   2, # number of epochs 
    'logging_steps':                      1, # number of steps before logging
    'max_seq_length':                   384, # maximum sequence length in tokens
    'doc_stride':                       128, # stride when processing sentences
    'overwrite_output_dir':            True, # overwrite the output directory
    'reprocess_input_data':           False, # reprocess the input data
    'train_batch_size':                  16, # training batch size
    'gradient_accumulation_steps':        1, # steps before applying gradients
    'evaluate_during_training':        True, # run evaluation during training
    'evaluate_during_training_steps':    40, # steps in training before eval
    'save_eval_checkpoints':          False, # save evaluation checkpoints
    'eval_batch_size':                   16, # evaluation batch size
    'wandb_project':   'SimpleTransformers', # wandb project name
    'wandb_kwargs': {'job_type': 'training'} # additional args for wandb init
}

In [4]:
import wandb 

# initialize a run to save the datasets
run = wandb.init(
    project=train_args['wandb_project'],
    job_type="raw-dataset-creation"
)

# log the raw data
raw_data_artifact = wandb.Artifact("raw-data", "dataset")
raw_data_artifact.add_dir('RawData/')
run.log_artifact(raw_data_artifact)

run.finish()

[34m[1mwandb[0m: Currently logged in as: [33mwandb[0m (use `wandb login --relogin` to force relogin)


[34m[1mwandb[0m: Adding directory to artifact (./RawData)... Done. 0.1s


VBox(children=(Label(value=' 0.01MB of 0.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [5]:
!mkdir -p Data
import json
import random

with open('RawData/train.json', 'r') as f:
    train_data = json.load(f)
train_data = [item for topic in train_data['data'] for item in topic['paragraphs'] ]
random.shuffle(train_data)
train_data = train_data[:int(len(train_data) * 0.01)]

with open('RawData/eval.json', 'r') as f:
    eval_data = json.load(f)
eval_data = [item for topic in eval_data['data'] for item in topic['paragraphs'] ]
random.shuffle(eval_data)
eval_data = eval_data[:int(len(eval_data) * 0.01)]

with open('Data/train.json', 'w') as f:
    json.dump(train_data, f)
with open('Data/eval.json', 'w') as f:
    json.dump(eval_data, f)

mkdir: cannot create directory ‘Data’: File exists


In [6]:
# initialize a run to save the datasets
run = wandb.init(
    project=train_args['wandb_project'],
    job_type="dataset-split-creation"
)

# use the raw data artifact
run.use_artifact("raw-data:latest")

# log the train data artifact
train_artifact = wandb.Artifact("train-data", "dataset")
train_artifact.add_file("Data/train.json")
run.log_artifact(train_artifact)

# log the train data artifact
eval_artifact = wandb.Artifact("eval-data", "dataset")
eval_artifact.add_file("Data/eval.json")
run.log_artifact(eval_artifact)

# finish logging the data logging run
run.finish()

VBox(children=(Label(value=' 0.46MB of 0.46MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [8]:
from simpletransformers.question_answering import QuestionAnsweringModel

model = QuestionAnsweringModel('bert', 'bert-base-cased', args=train_args)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForQuestionAnswering: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-cased and a

In [36]:
%%capture --no-display
model.train_model(train_data, eval_data=eval_data)

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

VBox(children=(Label(value=' 0.26MB of 0.26MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,1.36355
lr,2e-05
global_step,40.0
_runtime,35.0
_timestamp,1631147438.0
_step,42.0
correct,50.0
similar,65.0
incorrect,3.0
train_loss,1.50604


Running Epoch 0 of 2:   0%|          | 0/81 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/81 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

(162,
 {'global_step': [10,
   20,
   30,
   40,
   50,
   60,
   70,
   80,
   81,
   90,
   100,
   110,
   120,
   130,
   140,
   150,
   160,
   162],
  'correct': [47,
   52,
   32,
   54,
   41,
   36,
   46,
   42,
   43,
   52,
   38,
   57,
   39,
   46,
   47,
   46,
   47,
   48],
  'similar': [64,
   60,
   74,
   57,
   69,
   75,
   64,
   72,
   71,
   65,
   73,
   57,
   71,
   66,
   68,
   68,
   68,
   67],
  'incorrect': [7, 6, 12, 7, 8, 7, 8, 4, 4, 1, 7, 4, 8, 6, 3, 4, 3, 3],
  'train_loss': [1.859588623046875,
   1.6595916748046875,
   1.7039947509765625,
   1.5373878479003906,
   1.9902877807617188,
   1.5447425842285156,
   1.2659683227539062,
   2.0813827514648438,
   2.3034567832946777,
   1.0309219360351562,
   1.3239269256591797,
   0.698997974395752,
   1.2369308471679688,
   0.6695728302001953,
   1.1457672119140625,
   0.9256343841552734,
   1.2290210723876953,
   0.7256609201431274],
  'eval_loss': [-4.4765625,
   -4.50341796875,
   -4.6259765625,
   -

In [10]:
%%capture 
# when trying to use artifact it asks for api artifact obj instead of the string name 
api = wandb.Api()
training_run = api.run("wandb/" + train_args['wandb_project'] + "/" + model.wandb_run_id)
training_run.use_artifact(api.artifact("wandb/" + train_args['wandb_project'] + "/" + "train-data:latest"))
training_run.use_artifact(api.artifact("wandb/" + train_args['wandb_project'] + "/" + "eval-data:latest"))

<Artifact QXJ0aWZhY3Q6MjM0Nzk4NjY=>

In [11]:
import os 
with wandb.init(id=model.wandb_run_id, resume="allow", project=train_args['wandb_project']) as training_run:
    for dir in sorted(os.listdir("outputs")):
        if "checkpoint" in dir:
            artifact = wandb.Artifact("model-checkpoints", type="checkpoints")
            artifact.add_dir("outputs/"+dir)
            training_run.log_artifact(artifact)

VBox(children=(Label(value=' 0.67MB of 0.67MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,1.77778
lr,0.0
global_step,162.0
_runtime,142.0
_timestamp,1631146242.0
_step,179.0
correct,53.0
similar,61.0
incorrect,4.0
train_loss,1.77778


0,1
Training loss,███▇▆▅▅▅▄▄▄▄▃▄▄▄▄▄▃▃▂▄▃▃▂▂▂▂▂▂▃▁▃▃▂▂▃▂▂▁
lr,▂▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
_timestamp,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
correct,▁█████████▇▇▇▇▇▇▇▇
similar,█▁▁▁▁▁▁▁▁▂▃▄▂▃▃▅▂▂
incorrect,█▁▁▁▁▁▁▁▁▁▂▂▁▁▂▂▂▂
train_loss,█▆▅▄▃▃▄▃▃▃▂▂▂▁▂▁▂▁


[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-162-epoch-2)... Done. 6.6s
[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-3-epoch-1)... Done. 1.6s
[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-6-epoch-2)... Done. 1.6s
[34m[1mwandb[0m: Adding directory to artifact (./outputs/checkpoint-81-epoch-1)... Done. 12.1s


VBox(children=(Label(value=' 4932.06MB of 4932.06MB uploaded (0.21MB deduped)\r'), FloatProgress(value=1.0, ma…

0,1
incorrect,4.0
train_loss,1.77778
global_step,162.0
_step,179.0
similar,61.0
_runtime,142.0
eval_loss,-3.56372
_timestamp,1631146242.0
Training loss,1.77778
lr,0.0


In [12]:
_, outputs = model.eval_model(eval_data)

Running Evaluation:   0%|          | 0/8 [00:00<?, ?it/s]

In [34]:
import pandas as pd

eval_data_df = pd.DataFrame(
    columns=[
        'id',
        'question',
        'context'
    ]
)
for context in eval_data:
    for qas in context['qas']:
            eval_data_df = eval_data_df.append([
                {
                    'id': qas['id'],
                    'context': context['context'],
                    'question': qas['question']
                }
            ])

eval_data_df = eval_data_df.reset_index(drop=True)

results = pd.DataFrame(
    columns=[
        'id',
        'predicted_answer',
        'actual_answer',
        'category'
    ]
)

for entry in outputs['correct_text']:
    results = results.append([
        {
            "id": entry, 
            "predicted_answer": outputs['correct_text'][entry],
            "actual_answer": outputs['correct_text'][entry],
            "category": "correct"
        }
    ])

for entry in outputs['similar_text']:
    results = results.append([
        {
            "id": entry, 
            "predicted_answer": outputs['similar_text'][entry]['predicted'],
            "actual_answer": outputs['similar_text'][entry]['truth'],
            "category": "similar"
        }
    ])

for entry in outputs['incorrect_text']:
    results = results.append([
        {
            "id": entry, 
            "predicted_answer": outputs['incorrect_text'][entry]['predicted'],
            "actual_answer": outputs['incorrect_text'][entry]['truth'],
            "category": "incorrect"
        }
    ])

results = results.reset_index(drop=True)
results = eval_data_df.set_index("id").join(results.set_index("id"))
results = results.drop_duplicates()

with wandb.init(resume=model.wandb_run_id, project=train_args["wandb_project"]) as training_run:
    training_run.log({"eval-results": wandb.Table(dataframe=results)})
training_run.finish()

VBox(children=(Label(value=' 0.48MB of 0.48MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,230.0
eval_loss,-3.56372
global_step,162.0
correct,53.0
similar,61.0
incorrect,4.0
_timestamp,1631147287.0
train_loss,1.77778
lr,0.0
_step,180.0


0,1
_runtime,▁
_timestamp,▁
_step,▁
