<a href="https://colab.research.google.com/github/olwflynn/Learning/blob/master/TransformerPolicyFn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets evaluate transformers[sentencepiece]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.8.0-py3-none-any.whl (452 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m452.9/452.9 KB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers[sentencepiece]
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m64.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0.0,>=0.2.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.4/182.4 KB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting xxhash
  Downloading xxhash-3.2.0-cp38

In [None]:
import torch
from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification

# Same as before
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
sequences = [
    "I've been waiting for a HuggingFace course my whole life.",
    "This course is amazing!",
]
batch = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")

# This is new
batch["labels"] = torch.tensor([1, 1])

optimizer = AdamW(model.parameters())
loss = model(**batch).loss
loss.backward()
optimizer.step()

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
print(batch)

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  2023,  2607,  2003,  6429,   999,   102,     0,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'labels': tensor([1, 1])}


TODO:
- generate dataset of text inputs and output policy i.e. set of actions
- train agent to predict by finetuning GPT-2 features using the dataset
- use agent to create and refine action space

In [23]:
#@title Generate an example text dataset deterministically

# text input | < action_1, action_2, ... >

import random
import numpy as np
import pandas as pd

# utility fn that takes list of actions and creates text in the below form
# e.g. [R, L, R, R, None, ...] --> Move right then move left then move right then do nothing until the end of the episode

def create_text_from_actions(list_of_actions):
  text = ''
  for i, action in enumerate(list_of_actions):
    if action == 'R':
      text+=' move right'
    if action == 'L':
      text+=' move left'
    if action == None:
      text+=' do nothing'
    if action == '...' and i == len(list_of_actions)-1:
      text+=' until the end of the episode'
    if i != len(list_of_actions)-1:
      text+=' then'
  return text.strip()


# utility fn that generates policies (list of actions) randomly

def create_policies(num_policies=10):
  list_of_policies = []
  possible_actions = set(['R', 'L', '...', None])
  for i in range(num_policies):
    policy = []
    keep_running = True
    while keep_running:
      next_action = random.choice(tuple(possible_actions))
      policy.append(next_action)
      if next_action == '...':
        keep_running = False
    if len(policy) > 1:
      list_of_policies.append(policy)
  return list_of_policies

# utility fn that creates a dataframe of policies and their text representation

def create_text_policy_dataset(list_of_policies):
  n = len(list_of_policies)
  arr = np.array(list_of_policies).reshape(n, -1)
  arr_of_text = np.array([create_text_from_actions(policy) for policy in list_of_policies]).reshape(n, -1)
  arr = np.concatenate((arr, arr_of_text), axis=1)
  df = pd.DataFrame(arr, columns=['policy', 'text'])
  return df

policies = create_policies()
df = create_text_policy_dataset(policies)
df.values[:5]

  arr = np.array(list_of_policies).reshape(n, -1)


array([[list([None, None, 'R', '...']),
        'do nothing then do nothing then move right then until the end of the episode'],
       [list(['R', 'R', 'R', None, 'R', '...']),
        'move right then move right then move right then do nothing then move right then until the end of the episode'],
       [list([None, 'R', 'L', None, 'R', 'L', '...']),
        'do nothing then move right then move left then do nothing then move right then move left then until the end of the episode'],
       [list(['R', 'R', 'L', 'L', '...']),
        'move right then move right then move left then move left then until the end of the episode'],
       [list([None, '...']),
        'do nothing then until the end of the episode']], dtype=object)