In [1]:
import torch

from torch.utils.data import DataLoader
from minicons import scorer

In [2]:
lm = scorer.IncrementalLMScorer('gpt2-xl')

Using pad_token, but it is not set yet.


In [3]:
def create_hypothesis(choice):
    choice_split = choice.split(" ")
    if choice_split[0] == "I":
        choice = choice
    else:
        choice = " ".join([choice_split[0].lower()] + choice_split[1:])

    return choice

def create_stimuli(instance):
    premise, choice1, choice2, question, label = instance['premise'], instance['choice1'], instance['choice2'], instance['question'], instance['label']

    domain = {
        'cause': ' because',
        'effect': ' so'
    }

    hypothesis1 = create_hypothesis(choice1)
    hypothesis2 = create_hypothesis(choice2)

    premise = premise[:-1] + domain[question]

    if label == 0:
        return premise.lower(), hypothesis1, hypothesis2, domain[question].strip()
    else:
        return premise.lower(), hypothesis2, hypothesis1, domain[question].strip()
    
def create_stimuli_flipped(instance):
    premise, choice1, choice2, question, label = instance['premise'], instance['choice1'], instance['choice2'], instance['question'], instance['label']

    domain = {
        'cause': ' so',
        'effect': ' because'
    }

    hypothesis1 = create_hypothesis(choice1).lower()[:-1] + domain[question]
    hypothesis2 = create_hypothesis(choice2).lower()[:-1] + domain[question]

#     premise = premise[:-1] 

    if label == 0:
        return premise.lower(), hypothesis1, hypothesis2, domain[question].strip()
    else:
        return premise.lower(), hypothesis2, hypothesis1, domain[question].strip()

In [4]:
data = [{"premise": "The man turned on the faucet.", "choice1": "The toilet filled with water.", "choice2": "Water flowed from the spout.", "question": "effect", "label": 1, "idx": 0}, {"premise": "The bar closed.", "choice1": "it was crowded.", "choice2": "it was 3 AM.", "question": "cause", "label": 1, "idx": 0}]

In [5]:
stimuli = [create_stimuli(d) for d in data]

In [6]:
stimuli

[('the man turned on the faucet so',
  'water flowed from the spout.',
  'the toilet filled with water.',
  'so'),
 ('the bar closed because', 'it was 3 AM.', 'it was crowded.', 'because')]

In [7]:
dl = DataLoader(stimuli, batch_size=2)

for batch in dl:
    premise, hypothesis1, hypothesis2, domain = batch
    premise, hypothesis1, hypothesis2, domain = [list(x) for x in [premise, hypothesis1, hypothesis2, domain]]

    lpcn = torch.tensor(lm.partial_score(premise, hypothesis1, reduction=lambda x: x.sum(0).item()))
    lpwn = torch.tensor(lm.partial_score(premise, hypothesis2, reduction=lambda x: x.sum(0).item()))
    lpcd = torch.tensor(lm.partial_score(domain, hypothesis1, reduction=lambda x: x.sum(0).item()))
    lpwd = torch.tensor(lm.partial_score(domain, hypothesis2, reduction=lambda x: x.sum(0).item()))

    print(lpcn - lpcd, lpwn - lpwd, lpcn - lpcd > lpwn - lpwd, lpcn > lpwn)

tensor([13.7594,  4.7804]) tensor([12.0711,  8.0356]) tensor([ True, False]) tensor([False, False])


In [94]:
stimuli_flipped = [create_stimuli_flipped(d) for d in data]

dl = DataLoader(stimuli_flipped, batch_size=2)

for batch in dl:
    premise, hypothesis1, hypothesis2, domain = batch
    premise, hypothesis1, hypothesis2, domain = [list(x) for x in [premise, hypothesis1, hypothesis2, domain]]

    lpcn = torch.tensor(lm.partial_score(hypothesis1, premise, reduction=lambda x: x.sum(0).item()))
    lpwn = torch.tensor(lm.partial_score(hypothesis2, premise, reduction=lambda x: x.sum(0).item()))
    lpcd = torch.tensor(lm.partial_score(domain, premise, reduction=lambda x: x.sum(0).item()))
    lpwd = torch.tensor(lm.partial_score(domain, premise, reduction=lambda x: x.sum(0).item()))

    print(lpcn - lpcd, lpwn - lpwd, lpcn - lpcd > lpwn - lpwd)

tensor([8.8036, 7.4328]) tensor([10.2093,  5.8976]) tensor([False,  True])


In [97]:
lpcn > lpwn

tensor([False,  True])

In [76]:
lpwd

tensor([-27.7210, -17.2235])

In [23]:
lpcn > lpwn

tensor([False, False])

In [82]:
lm.token_score([f"{x[0]} {x[1]}" for x in list(zip(premise, hypothesis2))])

[[('the', 0.0),
  ('man', -5.960054397583008),
  ('turned', -7.484256744384766),
  ('on', -3.765570640563965),
  ('the', -1.1261520385742188),
  ('f', -4.826846599578857),
  ('auc', -0.027098655700683594),
  ('et', -0.08421897888183594),
  ('so', -5.881808280944824),
  ('the', -1.6875057220458984),
  ('toilet', -6.178640365600586),
  ('filled', -5.008761405944824),
  ('with', -0.9246549606323242),
  ('water', -0.3520994186401367),
  ('.', -1.4982290267944336)],
 [('the', 0.0),
  ('bar', -8.088858604431152),
  ('closed', -7.8573174476623535),
  ('because', -5.538971900939941),
  ('it', -2.4520950317382812),
  ('was', -0.4408855438232422),
  ('crowded', -4.37636137008667),
  ('.', -1.9185810089111328)]]

In [67]:
list(zip(premise, hypothesis1))[0]

('the man turned on the faucet so', 'water flowed from the spout.')

In [73]:
-2.4520950317382812 + -0.4408855438232422 + -6.21660041809082 + -3.241485595703125 + -2.4722299575805664

-14.823296546936035

In [81]:
lm.token_score([f"{x[0]} {x[1]}" for x in list(zip(domain, hypothesis2))])

[[('so', 0.0),
  ('the', -4.760571479797363),
  ('toilet', -9.693585395812988),
  ('filled', -8.273508071899414),
  ('with', -0.7671117782592773),
  ('water', -1.8473739624023438),
  ('.', -2.3788585662841797)],
 [('because', 0.0),
  ('it', -2.3379716873168945),
  ('was', -2.228343963623047),
  ('crowded', -9.716365814208984),
  ('.', -2.9408254623413086)]]

In [75]:
-2.3379716873168945 + -2.228343963623047 + -8.116579055786133 + -3.3626742362976074 + -3.558131217956543

-19.603700160980225

In [77]:
lpcd

tensor([-30.6454, -19.6037])

In [80]:
-14.823296546936035 - -19.603700160980225

4.7804036140441895

In [83]:
lm.token_score([f"{x[0]} {x[1]}" for x in list(zip(domain, hypothesis1))])

[[('so', 0.0),
  ('water', -10.4304780960083),
  ('flowed', -8.087547302246094),
  ('from', -2.5472497940063477),
  ('the', -0.6567258834838867),
  ('sp', -6.336228370666504),
  ('out', -0.6313705444335938),
  ('.', -1.955796241760254)],
 [('because', 0.0),
  ('it', -2.3379716873168945),
  ('was', -2.228343963623047),
  ('3', -8.116579055786133),
  ('AM', -3.3626742362976074),
  ('.', -3.558131217956543)]]

In [91]:
lm.token_score([f"{x[0]} {x[1]}" for x in list(zip(hypothesis1, premise))])

[[('water', 0.0),
  ('flowed', -11.430306434631348),
  ('from', -2.6032838821411133),
  ('the', -0.6741828918457031),
  ('sp', -6.970684051513672),
  ('out', -0.7625389099121094),
  ('because', -7.885451793670654),
  ('the', -1.1935720443725586),
  ('man', -6.702916145324707),
  ('turned', -6.2994232177734375),
  ('on', -3.005727767944336),
  ('the', -0.2837047576904297),
  ('f', -2.0682010650634766),
  ('auc', -0.0009765625),
  ('et', -0.019672393798828125)],
 [('it', 0.0),
  ('was', -4.292654037475586),
  ('3', -7.729968070983887),
  ('am', -5.6749043464660645),
  ('so', -5.807781219482422),
  ('the', -3.3985538482666016),
  ('bar', -5.1301984786987305),
  ('closed', -4.928037643432617)]]

In [92]:
lm.token_score([f"{x[0]} {x[1]}" for x in list(zip(hypothesis2, premise))])

[[('the', 0.0),
  ('toilet', -9.90822982788086),
  ('filled', -8.217606544494629),
  ('with', -0.3010854721069336),
  ('water', -2.0568485260009766),
  ('because', -6.105167865753174),
  ('the', -1.6206865310668945),
  ('man', -5.077610492706299),
  ('turned', -6.318398475646973),
  ('on', -1.8222923278808594),
  ('the', -0.17661190032958984),
  ('f', -1.5234184265136719),
  ('auc', -0.0002613067626953125),
  ('et', -0.04168701171875)],
 [('it', 0.0),
  ('was', -4.292654037475586),
  ('crowded', -10.155437469482422),
  ('so', -5.501846790313721),
  ('the', -3.220942497253418),
  ('bar', -5.715605735778809),
  ('closed', -6.145937919616699)]]