In [1]:
from minicons.minicons import cwe

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import pytorch_lightning as pl

import csv
from tqdm import tqdm
from collections import defaultdict

In [2]:
def load_height_dataset(split = "train"):
    dataset = []
    with open(f"../data/height_{split}.csv", "r") as f:
        reader = csv.reader(f)
        next(f)
        for line in reader:
            sentence, name, vs, s, m, t, vt = line
            fuzziness = torch.tensor([float(x) for x in [vs, s, m, t, vt]])
            dataset.append([(sentence, name), fuzziness])
    return dataset
            
# height_train = load_height_dataset("train")
# height_test = load_height_dataset("test")
# height_dev = load_height_dataset("dev")

In [6]:
bert =cwe.CWE("bert-base-uncased", "cpu")

In [7]:
bert.extract_representation([('Delilah is 61.37 cm tall.', 'Delilah'), ("Roberto 's height is 61.38 cm.", 'Roberto'), ('Bryan is 61.4 cm.', 'Bryan'), ("Serena 's height is 61.41 cm.", 'Serena'), ('Isabelle is 61.42 cm.', 'Isabelle'), ('Cathy is 61.43 cm tall.', 'Cathy'), ('Therese is 61.44 cm.', 'Therese'), ('Patty is 61.45 cm tall.', 'Patty'), ('Emanuel is 61.46 cm tall.', 'Emanuel'), ("Edward 's height is 61.47 cm.", 'Edward')], 0)

tensor([[ 0.9993, -0.2315, -0.4809,  ...,  0.1523, -0.1126, -0.7785],
        [ 0.3991, -0.0655,  0.1400,  ...,  0.5235,  0.1914, -0.5471],
        [-0.9946,  0.8755, -0.5888,  ...,  0.1873,  2.0128,  0.3117],
        ...,
        [ 1.2157, -0.2108,  1.1080,  ...,  0.7018,  0.1657, -0.1995],
        [ 0.2382,  1.1208,  0.4747,  ..., -0.0275, -0.6111, -1.2599],
        [ 0.2843, -0.3253, -0.4962,  ...,  0.2942,  1.0745, -0.7884]])

In [8]:
torch.nn.functional.l1_loss(torch.tensor([[1.0,0.0,0.0], [0.0,1.0,0.0]]), torch.tensor([[0.9, 0.2, 0.1], [0.0, 1.0, 0.7]]))

tensor(0.1833)

In [9]:
class HeightProbe(pl.LightningModule):
    def __init__(self, bert_layer = 1):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(768, 1024),
            nn.ReLU(),
            nn.Linear(1024, 5),
            nn.Sigmoid()
        )
        self.layer = bert_layer
        
    def forward(self, x):
        estimate = self.mlp(x)
        return estimate
    
    def training_step(self, batch, batch_idx):
        # training_step defined the train loop.
        # It is independent of forward
        queries, fuzziness = batch
        queries = list(zip(*queries))
        representation = bert.extract_representation(queries, self.layer)
        y_hat = self.mlp(representation)
        loss = F.l1_loss(y_hat, fuzziness)
        # Logging to TensorBoard by default
        self.log('train_loss', loss, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        queries, fuzziness = batch
        queries = list(zip(*queries))
        representation = bert.extract_representation(queries, self.layer)
        y_hat = self.mlp(representation)
        loss = F.l1_loss(y_hat, fuzziness)
        self.log('val_loss', loss, prog_bar=True, logger=True)
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3, weight_decay=1e-5)
        return optimizer

In [10]:
class HeightDataModule(pl.LightningDataModule):
    def __init__(self, batch_size = 10, shuffle = False):
        super().__init__()
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.train, self.val, self.test = load_height_dataset("train"), load_height_dataset("dev"), load_height_dataset("test")
    
    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, shuffle=self.shuffle)
    
    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size, shuffle=self.shuffle)
    
    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, shuffle=self.shuffle)

In [11]:
hp = HeightProbe(bert_layer = 10)
height_data = HeightDataModule(shuffle = True)

In [12]:
trainer = pl.Trainer(max_steps=2000, val_check_interval=100)
trainer.fit(hp, height_data)

GPU available: True, used: False
TPU available: None, using: 0 TPU cores

  | Name | Type       | Params
------------------------------------
0 | mlp  | Sequential | 792 K 
------------------------------------
792 K     Trainable params
0         Non-trainable params
792 K     Total params
3.170     Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…






1

In [32]:
hp(bert.extract_representation(["Everett is 10 cm tall.", "Everett"], 10))

tensor([[7.1932e-08, 2.9458e-09, 7.4915e-09, 1.4585e-09, 8.7141e-09]],
       grad_fn=<SigmoidBackward>)