In [1]:
# !pip install funcy
# %env OPTIMUS_CHECKPOINT_DIR=../pretrained_models/optimus_snli10/checkpoint-31250/

In [3]:
"""
Import our dependencies
"""
import pandas as pd
import numpy as np
import buckets as b

In [6]:
plurals_filename = b.get_file("s3://scored/plurals_data_scored.csv")
opposites_filename = b.get_file("s3://scored/opposite_data_scored.csv")
comparatives_filename = b.get_file("s3://scored/comparative_data_scored.csv")
plurals = pd.read_csv(plurals_filename)
opposites = pd.read_csv(opposites_filename)
comparatives = pd.read_csv(comparatives_filename)

Downloading file from scored/plurals_data_scored.csv to /tmp/plurals_data_scored.csv
Downloading file from scored/opposite_data_scored.csv to /tmp/opposite_data_scored.csv
Downloading file from scored/comparative_data_scored.csv to /tmp/comparative_data_scored.csv


## Plurals

In [7]:
print("Counts of each type of value within the plurals dataset")
plural_type_counts = plurals.subcategory.value_counts()
plural_type_counts

Counts of each type of value within the plurals dataset


plural|from-single                  5007
plural|to-some|indefinite            418
plural|to-many|indefinite            410
plural|to-twenty two|indefinite      408
plural|to-ten|indefinite             408
plural|to-various|indefinite         379
plural|to-one hundred|indefinite     370
plural|to-five|indefinite            362
plural|to-twenty|indefinite          357
plural|to-three|indefinite           335
plural|to-six|indefinite             333
plural|to-two|indefinite             318
plural|to-two hundred|indefinite     300
plural|to-nine|indefinite            298
plural|to-four|indefinite            292
plural|to-twenty|definite              2
plural|to-two hundred|definite         2
plural|to-six|definite                 1
Name: subcategory, dtype: int64

In [8]:
print("Percentage of each type which were found to be exact matches")
(plurals.groupby(by="subcategory")['score_0_exact'].agg("sum") / plural_type_counts) * 100

Percentage of each type which were found to be exact matches


plural|from-single                   8.168564
plural|to-five|indefinite            4.972376
plural|to-four|indefinite           11.986301
plural|to-many|indefinite            3.658537
plural|to-nine|indefinite            0.671141
plural|to-one hundred|indefinite     0.000000
plural|to-six|definite               0.000000
plural|to-six|indefinite             2.702703
plural|to-some|indefinite            6.220096
plural|to-ten|indefinite             1.470588
plural|to-three|indefinite          11.641791
plural|to-twenty two|indefinite      0.000000
plural|to-twenty|definite            0.000000
plural|to-twenty|indefinite          0.560224
plural|to-two hundred|definite       0.000000
plural|to-two hundred|indefinite     0.000000
plural|to-two|indefinite             9.748428
plural|to-various|indefinite         0.000000
dtype: float64

In [10]:
print("Evaluating means of bleu scores")
plurals.groupby(by="subcategory")['score_0_bleu'].agg("mean").round(4)


Evaluating means of bleu scores


subcategory
plural|from-single                  0.2526
plural|to-five|indefinite           0.2368
plural|to-four|indefinite           0.3300
plural|to-many|indefinite           0.2407
plural|to-nine|indefinite           0.1876
plural|to-one hundred|indefinite    0.1206
plural|to-six|definite              0.0000
plural|to-six|indefinite            0.2214
plural|to-some|indefinite           0.2452
plural|to-ten|indefinite            0.2042
plural|to-three|indefinite          0.3067
plural|to-twenty two|indefinite     0.1427
plural|to-twenty|definite           0.2789
plural|to-twenty|indefinite         0.1731
plural|to-two hundred|definite      0.0001
plural|to-two hundred|indefinite    0.1645
plural|to-two|indefinite            0.3213
plural|to-various|indefinite        0.3033
Name: score_0_bleu, dtype: float64

In [9]:
print("Median bleu score of each subcategory")
plurals.groupby(by="subcategory")['score_0_bleu'].agg("median").round(4)

Median bleu score of each subcategory


subcategory
plural|from-single                  0.0003
plural|to-five|indefinite           0.0002
plural|to-four|indefinite           0.2253
plural|to-many|indefinite           0.0003
plural|to-nine|indefinite           0.0002
plural|to-one hundred|indefinite    0.0000
plural|to-six|definite              0.0000
plural|to-six|indefinite            0.0003
plural|to-some|indefinite           0.0003
plural|to-ten|indefinite            0.0002
plural|to-three|indefinite          0.2115
plural|to-twenty two|indefinite     0.0001
plural|to-twenty|definite           0.2789
plural|to-twenty|indefinite         0.0002
plural|to-two hundred|definite      0.0001
plural|to-two hundred|indefinite    0.0002
plural|to-two|indefinite            0.2605
plural|to-various|indefinite        0.0007
Name: score_0_bleu, dtype: float64

## Opposites

In [14]:
opposite_type_counts = opposites.subcategory.value_counts()
opposite_type_counts

opposite|from-possibly       1778
opposite|from-decided        1723
opposite|from-sure           1697
opposite|from-competitive    1358
opposite|from-comfortable     551
opposite|from-known           527
opposite|from-possible        517
opposite|from-likely          447
opposite|from-certain         369
opposite|from-pleasant        328
opposite|from-impressive      233
opposite|from-aware           225
opposite|from-convenient       65
opposite|from-responsible      58
opposite|from-honest           30
opposite|from-fortunate        23
opposite|from-reasonable       20
opposite|from-productive       14
opposite|from-efficient        14
opposite|from-informed         13
opposite|from-informative      10
Name: subcategory, dtype: int64

In [15]:
(opposites.groupby(by="subcategory")['score_0_exact'].agg("sum") / opposite_type_counts) * 100

opposite|from-aware          0.444444
opposite|from-certain        0.000000
opposite|from-comfortable    1.633394
opposite|from-competitive    0.000000
opposite|from-convenient     0.000000
opposite|from-decided        0.000000
opposite|from-efficient      0.000000
opposite|from-fortunate      0.000000
opposite|from-honest         0.000000
opposite|from-impressive     0.000000
opposite|from-informative    0.000000
opposite|from-informed       0.000000
opposite|from-known          0.000000
opposite|from-likely         0.000000
opposite|from-pleasant       0.000000
opposite|from-possible       0.000000
opposite|from-possibly       0.000000
opposite|from-productive     0.000000
opposite|from-reasonable     0.000000
opposite|from-responsible    0.000000
opposite|from-sure           0.000000
dtype: float64

In [16]:
opposites.groupby(by="subcategory")['score_0_bleu'].agg("mean").round(4)

subcategory
opposite|from-aware          0.0757
opposite|from-certain        0.1364
opposite|from-comfortable    0.1123
opposite|from-competitive    0.1540
opposite|from-convenient     0.0750
opposite|from-decided        0.1091
opposite|from-efficient      0.0000
opposite|from-fortunate      0.1970
opposite|from-honest         0.0000
opposite|from-impressive     0.0920
opposite|from-informative    0.0002
opposite|from-informed       0.0004
opposite|from-known          0.0854
opposite|from-likely         0.0544
opposite|from-pleasant       0.1250
opposite|from-possible       0.0685
opposite|from-possibly       0.0854
opposite|from-productive     0.3119
opposite|from-reasonable     0.3020
opposite|from-responsible    0.0071
opposite|from-sure           0.0398
Name: score_0_bleu, dtype: float64

In [17]:
opposites.groupby(by="subcategory")['score_0_bleu'].agg("median").round(4)

subcategory
opposite|from-aware          0.0001
opposite|from-certain        0.0000
opposite|from-comfortable    0.0000
opposite|from-competitive    0.0002
opposite|from-convenient     0.0002
opposite|from-decided        0.0000
opposite|from-efficient      0.0000
opposite|from-fortunate      0.0004
opposite|from-honest         0.0000
opposite|from-impressive     0.0000
opposite|from-informative    0.0000
opposite|from-informed       0.0004
opposite|from-known          0.0000
opposite|from-likely         0.0000
opposite|from-pleasant       0.0003
opposite|from-possible       0.0000
opposite|from-possibly       0.0000
opposite|from-productive     0.4111
opposite|from-reasonable     0.3388
opposite|from-responsible    0.0000
opposite|from-sure           0.0000
Name: score_0_bleu, dtype: float64

## Comparatives


In [19]:
comparative_type_counts = comparatives.subcategory.value_counts()
comparative_type_counts

comparative|to-comp      5048
comparative|from-comp    4952
Name: subcategory, dtype: int64

In [22]:
(comparatives.groupby(by="subcategory")['score_0_exact'].agg("sum") / comparative_type_counts) * 100

comparative|from-comp    19.487076
comparative|to-comp      11.370840
dtype: float64

In [23]:
comparatives.groupby(by="subcategory")['score_0_bleu'].agg("mean").round(4)

subcategory
comparative|from-comp    0.3256
comparative|to-comp      0.3069
Name: score_0_bleu, dtype: float64

In [24]:
comparatives.groupby(by="subcategory")['score_0_bleu'].agg("median").round(4)

subcategory
comparative|from-comp    0.1593
comparative|to-comp      0.1570
Name: score_0_bleu, dtype: float64