In [1]:
# !pip install funcy
# %env OPTIMUS_CHECKPOINT_DIR=../pretrained_models/optimus_snli10/checkpoint-31250/

In [1]:
"""
Import our dependencies
"""
import pandas as pd
import numpy as np
import buckets as b

In [2]:
plurals_filename = b.get_file("s3://scored/plurals_data_scored.csv")
opposites_filename = b.get_file("s3://scored/opposite_data_scored.csv")
comparatives_filename = b.get_file("s3://scored/comparative_data_scored.csv")
plurals = pd.read_csv(plurals_filename)
opposites = pd.read_csv(opposites_filename)
comparatives = pd.read_csv(comparatives_filename)

Downloading file from scored/plurals_data_scored.csv to /tmp/plurals_data_scored.csv
Downloading file from scored/opposite_data_scored.csv to /tmp/opposite_data_scored.csv
Downloading file from scored/comparative_data_scored.csv to /tmp/comparative_data_scored.csv


## Plurals

In [3]:
print("Counts of each type of value within the plurals dataset")
plural_type_counts = plurals.subcategory.value_counts()
plural_type_counts

Counts of each type of value within the plurals dataset


plural|from-single                  5007
plural|to-some|indefinite            418
plural|to-many|indefinite            410
plural|to-ten|indefinite             408
plural|to-twenty two|indefinite      408
plural|to-various|indefinite         379
plural|to-one hundred|indefinite     370
plural|to-five|indefinite            362
plural|to-twenty|indefinite          357
plural|to-three|indefinite           335
plural|to-six|indefinite             333
plural|to-two|indefinite             318
plural|to-two hundred|indefinite     300
plural|to-nine|indefinite            298
plural|to-four|indefinite            292
plural|to-twenty|definite              2
plural|to-two hundred|definite         2
plural|to-six|definite                 1
Name: subcategory, dtype: int64

In [4]:
print("Percentage of each type which were found to be exact matches")
(plurals.groupby(by="subcategory")['score_0_exact'].agg("sum") / plural_type_counts) * 100

Percentage of each type which were found to be exact matches


plural|from-single                   8.168564
plural|to-five|indefinite            4.972376
plural|to-four|indefinite           11.986301
plural|to-many|indefinite            3.658537
plural|to-nine|indefinite            0.671141
plural|to-one hundred|indefinite     0.000000
plural|to-six|definite               0.000000
plural|to-six|indefinite             2.702703
plural|to-some|indefinite            6.220096
plural|to-ten|indefinite             1.470588
plural|to-three|indefinite          11.641791
plural|to-twenty two|indefinite      0.000000
plural|to-twenty|definite            0.000000
plural|to-twenty|indefinite          0.560224
plural|to-two hundred|definite       0.000000
plural|to-two hundred|indefinite     0.000000
plural|to-two|indefinite             9.748428
plural|to-various|indefinite         0.000000
dtype: float64

In [5]:
print("Evaluating means of bleu scores")
plurals.groupby(by="subcategory")['score_0_bleu'].agg("mean").round(4)


Evaluating means of bleu scores


subcategory
plural|from-single                  0.3253
plural|to-five|indefinite           0.2728
plural|to-four|indefinite           0.3607
plural|to-many|indefinite           0.2854
plural|to-nine|indefinite           0.2302
plural|to-one hundred|indefinite    0.1659
plural|to-six|definite              0.0000
plural|to-six|indefinite            0.2679
plural|to-some|indefinite           0.2857
plural|to-ten|indefinite            0.2479
plural|to-three|indefinite          0.3405
plural|to-twenty two|indefinite     0.1785
plural|to-twenty|definite           0.3078
plural|to-twenty|indefinite         0.2299
plural|to-two hundred|definite      0.0626
plural|to-two hundred|indefinite    0.2163
plural|to-two|indefinite            0.3685
plural|to-various|indefinite        0.3402
Name: score_0_bleu, dtype: float64

In [6]:
print("Median bleu score of each subcategory")
plurals.groupby(by="subcategory")['score_0_bleu'].agg("median").round(6)

Median bleu score of each subcategory


subcategory
plural|from-single                  0.269111
plural|to-five|indefinite           0.166029
plural|to-four|indefinite           0.304200
plural|to-many|indefinite           0.182124
plural|to-nine|indefinite           0.000292
plural|to-one hundred|indefinite    0.000159
plural|to-six|definite              0.000000
plural|to-six|indefinite            0.182071
plural|to-some|indefinite           0.203245
plural|to-ten|indefinite            0.000336
plural|to-three|indefinite          0.270657
plural|to-twenty two|indefinite     0.000184
plural|to-twenty|definite           0.307814
plural|to-twenty|indefinite         0.111170
plural|to-two hundred|definite      0.062622
plural|to-two hundred|indefinite    0.000300
plural|to-two|indefinite            0.324668
plural|to-various|indefinite        0.263050
Name: score_0_bleu, dtype: float64

In [7]:
print("Percent of exact values from entire plural set")
(plurals['score_0_exact'].agg("sum") / len(plurals.index)) * 100

Percent of exact values from entire plural set


5.92

In [8]:
print("Average bleu score for plurals")
plurals['score_0_bleu'].agg("mean").round(4)

Average bleu score for plurals


0.297

In [9]:
print("Median bleu score for plurals")
plurals['score_0_bleu'].agg("median").round(6)

Median bleu score for plurals


0.224204

In [10]:
print("Percent of exact values from plurals where subcategory is not to-single")
(plurals[plurals['subcategory'] != 'plural|from-single']['score_0_exact'].agg("sum") / len(plurals[plurals['subcategory'] != 'plural|from-single'].index)) * 100

Percent of exact values from plurals where subcategory is not to-single


3.66513118365712

In [11]:
print("Average bleu score for plurals where subcategory is not single")
plurals[plurals['subcategory'] != 'plural|from-single']['score_0_bleu'].agg("mean").round(4)

Average bleu score for plurals where subcategory is not single


0.2687

In [13]:
print("Median bleu score for plurals where subcategory is not single")
plurals[plurals['subcategory'] != 'plural|from-single']['score_0_bleu'].agg("median").round(6)

Median bleu score for plurals where subcategory is not single


0.161886

In [33]:
print("Examples of exact matches where not \"to-single\"")
plurals.query('score_0_exact == 1 & subcategory != \'plural|from-single\'').sample(n=20)

Examples of exact matches


Unnamed: 0.1,Unnamed: 0,a,b,c,d,category,subcategory,pred_0,score_0_bleu,score_0_exact
8471,8471,A man in a green shirt holds out a clipboard f...,some men in a green shirt holds out a clipboar...,A man dressed in black is playing the harmonica.,some men dressed in black are playing the harm...,neutral,plural|to-some|indefinite,some men dressed in black are playing the harm...,1.0,1
3340,3340,A man windsurfs in a wetsuit.,six men windsurfs in a wetsuit.,A man with curly hair is sleeping.,six men with curly hair are sleeping.,contradiction,plural|to-six|indefinite,six men with curly hair are sleeping.,1.0,1
9891,9891,A woman in colorful native attire featuring a ...,three women in colorful native attire featurin...,A woman with a backpack sitting on a bench rea...,three women with a backpack sitting on a bench...,entailment,plural|to-three|indefinite,three women with a backpack sitting on a bench...,1.0,1
4140,4140,A woman with dark hair is wearing a green swea...,four women with dark hair are wearing a green ...,A man wearing a white helmet in a crowd.,four men wearing a white helmet in a crowd.,entailment,plural|to-four|indefinite,four men wearing a white helmet in a crowd.,1.0,1
3799,3799,A woman in costume is marching with a large drum.,many women in costume are marching with a larg...,A woman standing in front of things.,many women standing in front of things.,neutral,plural|to-many|indefinite,many women standing in front of things.,1.0,1
3921,3921,A man in a bright green shirt shows a woman in...,four men in a bright green shirt shows a woman...,A man riding a bicycle through a crowd of people.,four men riding a bicycle through a crowd of p...,entailment,plural|to-four|indefinite,four men riding a bicycle through a crowd of p...,1.0,1
7994,7994,A man in costume is ringing a bell.,ten men in costume are ringing a bell.,A man in blue shorts rides a bike down a street.,ten men in blue shorts ride a bike down a street.,neutral,plural|to-ten|indefinite,ten men in blue shorts ride a bike down a street.,1.0,1
5438,5438,A man wearing a gray sweater walking through a...,three men wearing a gray sweater walking throu...,A man is standing on a ladder that is leaning ...,three men are standing on a ladder that is lea...,neutral,plural|to-three|indefinite,three men are standing on a ladder that is lea...,1.0,1
7507,7507,A woman in a green jacket and black sunglasses...,ten women in a green jacket and black sunglass...,A man in a hat is reaching toward a sign.,ten men in a hat are reaching toward a sign.,entailment,plural|to-ten|indefinite,ten men in a hat are reaching toward a sign.,1.0,1
49,49,A man with a gray shirt holds a young infant i...,some men with a gray shirt holds a young infan...,A man making a canvas shade.,some men making a canvas shade.,contradiction,plural|to-some|indefinite,some men making a canvas shade.,1.0,1


## Opposites

In [15]:
opposite_type_counts = opposites.subcategory.value_counts()
opposite_type_counts

opposite|from-possibly       1778
opposite|from-decided        1723
opposite|from-sure           1697
opposite|from-competitive    1358
opposite|from-comfortable     551
opposite|from-known           527
opposite|from-possible        517
opposite|from-likely          447
opposite|from-certain         369
opposite|from-pleasant        328
opposite|from-impressive      233
opposite|from-aware           225
opposite|from-convenient       65
opposite|from-responsible      58
opposite|from-honest           30
opposite|from-fortunate        23
opposite|from-reasonable       20
opposite|from-productive       14
opposite|from-efficient        14
opposite|from-informed         13
opposite|from-informative      10
Name: subcategory, dtype: int64

In [16]:
(opposites.groupby(by="subcategory")['score_0_exact'].agg("sum") / opposite_type_counts) * 100

opposite|from-aware          0.444444
opposite|from-certain        0.000000
opposite|from-comfortable    1.633394
opposite|from-competitive    0.000000
opposite|from-convenient     0.000000
opposite|from-decided        0.000000
opposite|from-efficient      0.000000
opposite|from-fortunate      0.000000
opposite|from-honest         0.000000
opposite|from-impressive     0.000000
opposite|from-informative    0.000000
opposite|from-informed       0.000000
opposite|from-known          0.000000
opposite|from-likely         0.000000
opposite|from-pleasant       0.000000
opposite|from-possible       0.000000
opposite|from-possibly       0.000000
opposite|from-productive     0.000000
opposite|from-reasonable     0.000000
opposite|from-responsible    0.000000
opposite|from-sure           0.000000
dtype: float64

In [17]:
opposites.groupby(by="subcategory")['score_0_bleu'].agg("mean").round(4)

subcategory
opposite|from-aware          0.1631
opposite|from-certain        0.2075
opposite|from-comfortable    0.2125
opposite|from-competitive    0.2889
opposite|from-convenient     0.2438
opposite|from-decided        0.1525
opposite|from-efficient      0.0000
opposite|from-fortunate      0.3951
opposite|from-honest         0.0296
opposite|from-impressive     0.2227
opposite|from-informative    0.1903
opposite|from-informed       0.0004
opposite|from-known          0.1879
opposite|from-likely         0.0796
opposite|from-pleasant       0.3380
opposite|from-possible       0.1117
opposite|from-possibly       0.1254
opposite|from-productive     0.3554
opposite|from-reasonable     0.3976
opposite|from-responsible    0.0169
opposite|from-sure           0.0903
Name: score_0_bleu, dtype: float64

In [18]:
opposites.groupby(by="subcategory")['score_0_bleu'].agg("median").round(4)

subcategory
opposite|from-aware          0.0004
opposite|from-certain        0.0003
opposite|from-comfortable    0.0005
opposite|from-competitive    0.2761
opposite|from-convenient     0.1769
opposite|from-decided        0.0002
opposite|from-efficient      0.0000
opposite|from-fortunate      0.4301
opposite|from-honest         0.0000
opposite|from-impressive     0.0005
opposite|from-informative    0.0003
opposite|from-informed       0.0004
opposite|from-known          0.0003
opposite|from-likely         0.0001
opposite|from-pleasant       0.3659
opposite|from-possible       0.0001
opposite|from-possibly       0.0002
opposite|from-productive     0.4567
opposite|from-reasonable     0.4467
opposite|from-responsible    0.0001
opposite|from-sure           0.0001
Name: score_0_bleu, dtype: float64

In [19]:
print("Percent of exact values from entire opposite set")
(opposites['score_0_exact'].agg("sum") / len(opposites.index)) * 100

Percent of exact values from entire opposite set


0.1

In [20]:
print("Average bleu score for opposites")
opposites['score_0_bleu'].agg("mean").round(4)

Average bleu score for opposites


0.1658

In [21]:
print("Median bleu score for opposites")
opposites['score_0_bleu'].agg("median").round(6)

Median bleu score for opposites


0.000211

## Comparatives


In [22]:
comparative_type_counts = comparatives.subcategory.value_counts()
comparative_type_counts

comparative|to-comp      5048
comparative|from-comp    4951
Name: subcategory, dtype: int64

In [23]:
(comparatives.groupby(by="subcategory")['score_0_exact'].agg("sum") / comparative_type_counts) * 100

comparative|from-comp    19.329428
comparative|to-comp      11.846276
dtype: float64

In [25]:
comparatives.groupby(by="subcategory")['score_0_bleu'].agg("mean").round(4)

subcategory
comparative|from-comp    0.4254
comparative|to-comp      0.3797
Name: score_0_bleu, dtype: float64

In [26]:
comparatives.groupby(by="subcategory")['score_0_bleu'].agg("median").round(4)

subcategory
comparative|from-comp    0.3689
comparative|to-comp      0.3457
Name: score_0_bleu, dtype: float64