In [44]:
from transformers import BertModel, BertTokenizer
import torch
from torch.utils.tensorboard import SummaryWriter
import csv

## Load data

In [133]:
# Design prompt 3, human prompts
human = []
with open("data/amazonTurkDesPrompt3.csv", 'r') as file:
    csvreader = csv.reader(file)
    for row in csvreader:
        human.append(row[0])
human = [i.replace('\n',' ') for i in human]    # Removing the line breaks as they mess with tensorboard.
human

['A jump rope that is also to be used as a tie and or a belt during the day and while at work.',
 'A muscle stimulate band that can be worn over any area of the body that promotes muscle use, great for traveling and people who have to sit a lot.',
 'A simple, but strong band that has a hook to be able to attach to any (thin) pole or sturdy object. The band will have a handle on one side to do pulling exercises for lats, curls for biceps, chest flys for pecs, arm extentions for triceps, etc. It will be versatile and lightweight, and can be used almost anywhere.',
 'A calf strengthener, similar to a finger strengthener. It is placed on the floor of the car and pressed and released like a spring. More resistance could be added via a stiffer spring for a more difficult excercise.',
 'A rubber ball that can be inflated / deflated extremely quickly.',
 'An x shaped resistance band that has 2 hand grips and 2 foot holds. The foothold will be stepped on to secure the system down, then the othe

In [114]:
# Design prompt 3, gpt prompts
gpt = []
with open("data/DesPrompt3.csv", 'r') as file:
    csvreader = csv.reader(file)
    for row in csvreader:
        gpt.append(row[0].split(". ")[1])
gpt

['Portable stepper with folding mechanism for storage.',
 'Mini jumping rope with handles that break down for packing.',
 'Compact pull-up bar with a suction cups for attaching to door frame.',
 'Collapsible ab wheel that uses resistance bands for resistance.',
 'Resistance bands loops with adjustable handles.',
 'Versatile yoga mat that can roll up and fit in a bag.',
 'Disc Shaped Pilates Reformer with a handle that allows for easy transport.',
 'Portable stationary bike that folds away for storage.',
 'Jump training platform that is collapsible for travel.',
 'Adjustable sit-up bench with foldable legs for travel.',
 'Adjustable kettlebell with a removable weight core for storage.',
 'Neck and Shoulders exerciser with a breakable frame for travel.',
 'Suspended exercise hammock with clips for packing.',
 'Handheld push-up bar with a fold-down feature.',
 'Mini weight set with a carrying case.',
 'Lightweight resistance bands with anchors for mounting on doors.',
 'Disc-shaped paddli

## Embed data

In [115]:
model_name = 'bert-base-uncased'

tokenizer = BertTokenizer.from_pretrained(model_name)

model = BertModel.from_pretrained(model_name)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [116]:
input_ids_human = tokenizer(human, add_special_tokens=True, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
input_ids_gpt = tokenizer(gpt, add_special_tokens=True, return_tensors='pt', max_length=512, truncation=True, padding='max_length')

In [117]:
with torch.no_grad():
    bert_output_human = model(input_ids_human['input_ids'])[1]
    bert_output_gpt = model(input_ids_gpt['input_ids'])[1]

print('Shape of human output: ', bert_output_human.shape)
print('Shape of gpt BERT output: ', bert_output_gpt.shape)

Shape of human output:  torch.Size([100, 768])
Shape of gpt BERT output:  torch.Size([50, 768])


## Visualize embeddings

In [134]:
writer = SummaryWriter('embeddings')

In [135]:
metadata = [('human', line) for line in human] + [('gpt', line) for line in gpt]

In [136]:
writer.add_embedding(torch.cat((bert_output_human, bert_output_gpt), 0), metadata=metadata, metadata_header=['writer', 'prompt'])

Run in terminal: `tensorboard --logdir="embeddings\" --host localhost`
[Link to open Tensorboard](http://localhost:6006)
Refresh page.