# Conceptual Analysis Workbook for 6/15
### Plan:
* Load models (GPT-3 from txt, OPT from transformers)
* Define helper methods for angle tests
* Conduct angle tests w/ GPT and OPT
* Define + discuss complexity tests
* Conduct complexity tests w/ OPT

In [1]:
import os
import openai
import torch

import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from transformers import GPT2Tokenizer, OPTModel

# Set in conda
openai.api_key = os.getenv('OPENAI_API_KEY')

## Load Models

### Vocab

In [2]:
# Load our common words vocab from file

vocab = []
with open(u'/gpfs/fs1/home/mbarlow6/Desktop/Conceptual-Analysis/barlow/valid_vocab.txt', 'r') as f:
    for w in f:
        vocab.append(w.strip())

### GPT-3

In [3]:
# Loading saved embeddings from GPT-3 Ada
# Ada embeds: 1024 dims

ada_embeds = []
with open(u'/gpfs/fs1/home/mbarlow6/Desktop/Conceptual-Analysis/barlow/gpt/gpt_ada.txt', 'r') as f:
    for line in f:
        ada_embeds.append([float(x) for x in line.strip().split()])

model_gpt = dict(zip(vocab, ada_embeds))

# Babbage load included below as comment.
"""
# Babbage embeds: 2048 dims
bab_embeds = []
with open(u'/gpfs/fs1/home/mbarlow6/Desktop/Conceptual-Analysis/barlow/gpt/gpt_babbage.txt', 'r') as f:
    for line in f:
        bab_embeds.append([float(x) for x in line.strip().split()])

model_bab = dict(zip(vocab, bab_embeds))
"""
#

"\n# Babbage embeds: 2048 dims\nbab_embeds = []\nwith open(u'/gpfs/fs1/home/mbarlow6/Desktop/Conceptual-Analysis/barlow/gpt/gpt_babbage.txt', 'r') as f:\n    for line in f:\n        bab_embeds.append([float(x) for x in line.strip().split()])\n\nmodel_bab = dict(zip(vocab, bab_embeds))\n"

In [4]:
# this will be my function for getting GPT-3 embeddings from API
gpt_models = ['text-similarity-ada-001',
              'text-similarity-babbage-001',
              'text-similarity-curie-001',
              'text-similarity-davinci-001']
def get_embedding(text, engine='text-similarity-ada-001'):
    text = text.replace('\n', ' ')
    return openai.Embedding.create(input=[text], engine=engine)['data'][0]['embedding']

### OPT

In [5]:
# Loding OPT with HuggingFace Transformers. Paramenter size can be changed- ['125m', '350m', '1.3b', '2.7b', '6.7b', '13b', '30b']
# NOTE: 1.3b uses 2.6G of .cache space.

# NOTE: The documentation recommends using the GPT2Tokenizer here.
tokenizer = GPT2Tokenizer.from_pretrained("facebook/opt-1.3b")
model_opt_raw = OPTModel.from_pretrained("facebook/opt-1.3b")

In [6]:
opt_embeds = []
with open(u'/gpfs/fs1/home/mbarlow6/Desktop/Conceptual-Analysis/barlow/opt/1_3B.txt', 'r') as f:
    for line in f:
        opt_embeds.append([float(x) for x in line.strip().split()])
model_opt = dict(zip(vocab, opt_embeds))

In [7]:
# This is how I'm getting the embedding from OPT.
def squeeze(word, tokenizer=tokenizer, model=model_opt_raw):
    # prepare inputs and model
    inputs = tokenizer(word, return_tensors="pt") # return pytorch tensors
    outputs = model(**inputs)

    # get embedding from final layer
    last_hidden_states = outputs.last_hidden_state

    # collapse to 1d
    embeddings = torch.squeeze(last_hidden_states, dim=0)

    # convert to list
    return [embed.tolist() for embed in embeddings][1]

## Define Helpers (for angle)

In [8]:
def positive(words, model='gpt'):
    """
    Args:
        words: iterable
        model: 'gpt' or 'opt'
    Returns:
        Positive (summed vectors) of word embeddings of a given list of words from the specified model. Defaults to GPT-3.
    """
    if isinstance(words, str):
        print(f"You requested the positive of the string \"{words}\". Did you mean [\"{words}\"]?")

    out = 0
    for token in words:
        # convert token to string
        word = str(token)

        # do model check - least intensive operation to repeat
        if model.lower() == 'gpt':
            # look for token in cached GPT embeds
            if word in model_gpt:
                ex = model_gpt[word]  # ex for "extracted"
            # if not found, query API
            else:
                ex = get_embedding(word)
                model_gpt[word] = ex
        elif model.lower() == 'opt':
            if word in model_opt:
                ex = model_opt[word]
            else:
                # squeeze!
                ex = squeeze(word)
        else:
            raise ValueError('Please provide either gpt or opt as a model choice.')

        # construct positive
        if isinstance(out, int):
            out = np.array(ex).reshape(1, -1)
        else:
            out += np.array(ex).reshape(1, -1)
            
    return out if not isinstance(out, int) else np.array([])

In [9]:
def most_similar(target, phrase=False, topn=10, model='gpt'):
    """
    Args:
        target: str or list[float] (if phrase)
        phrase: bool    -> True if target is vector. Default False.
        topn: int       -> Number of results to keep [0:2+topn]
        model: 'gpt' or 'opt'
    Returns:
        Top n most similar words in common vocab to the target vector in the specified model.
    """
    # holder for all similarity scores
    sims = {}

    # initialize starting point
    a = target

    # if no phrase, must convert to embed.
    if not phrase:
        a = positive([a], model) # I'm going to abuse positive.

    # now, we check every B in our vocab
    for term in vocab:
        b = positive([term], model)
        if isinstance(target, str):
            if term != target:
                sims[term] = cosine_similarity(a, b)
        elif b.any():
            sims[term] = cosine_similarity(a, b)
    
    return list({k: v for k, v in sorted(sims.items(), key=lambda x: x[1], reverse=True)}.items())[:2+topn]

In [10]:
def definition(words, topn=10, model='gpt'):
    """
    Args:
        words: iterable -> Items to be combined into a positive and defined
        topn: int       -> number of words to keep in definiton
        model: 'gpt' or 'opt'
    Returns:
        Top N most similar words in the specified model to a positive of the string representations
        of any provided interable, if it's members can be cast to strings. 
    """
    # get phrase
    if isinstance(words, str):
        words = [words]
    phrase = positive(words, model)

    # get definition
    def_of_positive = most_similar(phrase, True, topn, model)

    return def_of_positive

In [11]:
def define(words, topn=10):
    """
    Args:
        words: string or iterable -> Items to be combined into a positive and defined
        topn: int                 -> number of words to keep in definiton
    Returns:
        None. Formats and prints definition from both models.
    """
    print(*words, sep=', ')
    print('GPT-3:')
    print(definition(words, topn, 'gpt'))
    print('------------------------------------------------------')
    print('OPT:')
    print(definition(words, topn, 'opt'))
    print('------------------------------------------------------')

In [12]:
def calculate_similarity(words, target, vec=False, model='gpt'):
    """
    Args:
        words: iterable or vector   -> Items to be combined into a positive and compared,
                                       or already constructed vector of matching dimensions.
        target: str     -> single term to calulate similarity to.
        vec: bool       -> true if 'words' is a vector
        model: 'gpt' or 'opt'
    Returns:
        The cosine similarity of the words vector and target term in specified model.
    """
    # get phrase
    phrase = words
    if not vec:
        if isinstance(words, str):
            phrase = positive([words], model)
        else:
            phrase = positive(words, model)
    
    # get target
    target = positive([target], model)

    return cosine_similarity(phrase, target)[0][0]


In [13]:
def sim_test(words, target):
    """
    Args:
        words: iterable, str, or vector -> Items to be combined into a positive and compared,
                                            or already constructed vector of matching dimensions.
        target: str                     -> single term to calulate similarity to.
    Returns:
        None. Formats and prints similarity from both models.
    """
    print(*words, sep=', ', end='')
    print(f' -> {target}')
    print('GPT-3:')
    print(calculate_similarity(words, target, model='gpt'))
    print('------------------------------------------------------')
    print('OPT:')
    print(calculate_similarity(words, target, model='opt'))
    print('------------------------------------------------------')

I'm choosing to leave out a function where both A and B are vectors, as that is mostly helpful for phrase vectors we're not constucting ourselves - so I'll include it later if needed.

## Conduct Angle tests

### A - Bachelors, Husbands, and Wives
Words describing spousal relations.

In [15]:
define(['man', 'unmarried'])

man, unmarried
GPT-3:
[('mixture', array([[0.95492556]])), ('sort', array([[0.92664048]])), ('resident', array([[0.92474028]])), ('kitchen', array([[0.91976509]])), ('landscape', array([[0.91883643]])), ('occupy', array([[0.91856791]])), ('geography', array([[0.91596578]])), ('knit', array([[0.91542595]])), ('p.m.', array([[0.91415888]])), ('railroad', array([[0.91380011]]))]
------------------------------------------------------
OPT:
[('manage', array([[0.792206]])), ('unlucky', array([[0.76511297]])), ('uncontrolled', array([[0.76511297]])), ('unacceptable', array([[0.76511297]])), ('unusual', array([[0.76511297]])), ('unfair', array([[0.76511297]])), ('unemployment', array([[0.76511297]])), ('unexpected', array([[0.76511297]])), ('uniform', array([[0.76511297]])), ('unusually', array([[0.76511297]]))]
------------------------------------------------------


In [16]:
define(['man', 'married'])

man, married
GPT-3:
[('mixture', array([[0.95722058]])), ('palace', array([[0.92757294]])), ('tax', array([[0.92669628]])), ('earn', array([[0.92401706]])), ('package', array([[0.92374845]])), ('phase', array([[0.92117782]])), ('pupil', array([[0.92108494]])), ('ease', array([[0.91982054]])), ('cancel', array([[0.91770757]])), ('painful', array([[0.91756225]]))]
------------------------------------------------------
OPT:
[('man', array([[0.86633845]])), ('married', array([[0.79826154]])), ('mate', array([[0.71873399]])), ('woman', array([[0.69706798]])), ('brothers', array([[0.69481486]])), ('brother', array([[0.69481486]])), ('girl', array([[0.68302406]])), ('yeah', array([[0.67783929]])), ('daughter', array([[0.67521888]])), ('boyfriend', array([[0.67482141]]))]
------------------------------------------------------


In [17]:
sim_test(['man', 'unmarried'], 'bachelor')

man, unmarried -> bachelor
GPT-3:
0.8694018001827069
------------------------------------------------------
OPT:
0.2229285837413899
------------------------------------------------------


In [18]:
sim_test(['man', 'married'], 'husband')

man, married -> husband
GPT-3:
0.8948082387821381
------------------------------------------------------
OPT:
0.6647823958767523
------------------------------------------------------


In [19]:
define(['woman', 'married'])

woman, married
GPT-3:
[('pupil', array([[0.93324007]])), ('railroad', array([[0.92834275]])), ('disappointment', array([[0.92653557]])), ('cancel', array([[0.9251154]])), ('painful', array([[0.92502677]])), ('palace', array([[0.92372286]])), ('fetch', array([[0.92271516]])), ('faith', array([[0.92236675]])), ('running', array([[0.92115944]])), ('actor', array([[0.92062613]]))]
------------------------------------------------------
OPT:
[('daughter', array([[0.7797462]])), ('girlfriend', array([[0.74751899]])), ('girl', array([[0.73701617]])), ('wife', array([[0.73488944]])), ('husband', array([[0.73301062]])), ('children', array([[0.67936358]])), ('female', array([[0.67340456]])), ('guy', array([[0.66943601]])), ('manager', array([[0.66883044]])), ('male', array([[0.66845837]]))]
------------------------------------------------------


In [20]:
sim_test(['woman', 'married'], 'wife')

woman, married -> wife
GPT-3:
0.8418697738514674
------------------------------------------------------
OPT:
0.7348894384058191
------------------------------------------------------


In [21]:
define(['woman', 'spouse'])

woman, spouse
GPT-3:
[('width', array([[0.92651648]])), ('railroad', array([[0.91887132]])), ('painful', array([[0.91856077]])), ('faith', array([[0.91742113]])), ('cancel', array([[0.91710516]])), ('palace', array([[0.91570877]])), ('critical', array([[0.91506279]])), ('till', array([[0.91293668]])), ('lazy', array([[0.91244314]])), ('beside', array([[0.91200722]]))]
------------------------------------------------------
OPT:
[('spoil', array([[0.71116445]])), ('spice', array([[0.71116445]])), ('spend', array([[0.71116445]])), ('spouses', array([[0.71116445]])), ('spare', array([[0.71116445]])), ('spider', array([[0.71116445]])), ('spicy', array([[0.71116445]])), ('spouse', array([[0.71116445]])), ('girl', array([[0.62452564]])), ('husband', array([[0.59985304]]))]
------------------------------------------------------


In [22]:
define(['women', 'spouses'])

women, spouses
GPT-3:
[('ear', array([[0.93473576]])), ('party', array([[0.93251428]])), ('ours', array([[0.92566306]])), ('brush', array([[0.92565899]])), ('painful', array([[0.92550278]])), ('various', array([[0.92221346]])), ('geography', array([[0.92187047]])), ('sisters', array([[0.92024253]])), ('access', array([[0.92019819]])), ('empire', array([[0.91934091]]))]
------------------------------------------------------
OPT:
[('spice', array([[0.69116035]])), ('spend', array([[0.69116035]])), ('spouses', array([[0.69116035]])), ('spare', array([[0.69116035]])), ('spider', array([[0.69116035]])), ('spicy', array([[0.69116035]])), ('spouse', array([[0.69116035]])), ('girls', array([[0.63372983]])), ('children', array([[0.60757485]])), ('boys', array([[0.60236139]]))]
------------------------------------------------------


In [23]:
sim_test(['woman', 'spouse'], 'wife')

woman, spouse -> wife
GPT-3:
0.8302057291267502
------------------------------------------------------
OPT:
0.555712221540075
------------------------------------------------------


In [24]:
sim_test(['women', 'spouses'], 'wives')

women, spouses -> wives
GPT-3:
0.8865878366674778
------------------------------------------------------
OPT:
0.5844370522426273
------------------------------------------------------


In [25]:
define(['man', 'spouse'])

man, spouse
GPT-3:
[('man', array([[0.94911783]])), ('virtually', array([[0.9221998]])), ('silver', array([[0.91915238]])), ('information', array([[0.91893894]])), ('separate', array([[0.91803186]])), ('palace', array([[0.91785384]])), ('flavour', array([[0.91667311]])), ('till', array([[0.91661684]])), ('width', array([[0.9162023]])), ('occupy', array([[0.91355709]]))]
------------------------------------------------------
OPT:
[('manage', array([[0.78472153]])), ('spoon', array([[0.75132786]])), ('spoil', array([[0.75132786]])), ('spice', array([[0.75132786]])), ('spend', array([[0.75132786]])), ('spouses', array([[0.75132786]])), ('spare', array([[0.75132786]])), ('spider', array([[0.75132786]])), ('spicy', array([[0.75132786]])), ('spouse', array([[0.75132786]]))]
------------------------------------------------------


In [26]:
define(['men', 'spouses'])

men, spouses
GPT-3:
[('box', array([[0.93116393]])), ('brush', array([[0.92933049]])), ('ours', array([[0.92874469]])), ('painful', array([[0.92799058]])), ('various', array([[0.92785933]])), ('fetch', array([[0.92750024]])), ('geography', array([[0.92634869]])), ('temporarily', array([[0.92598712]])), ('detailed', array([[0.92587874]])), ('faith', array([[0.92532543]]))]
------------------------------------------------------
OPT:
[('spice', array([[0.79541312]])), ('spend', array([[0.79541312]])), ('spouses', array([[0.79541312]])), ('spare', array([[0.79541312]])), ('spider', array([[0.79541312]])), ('spicy', array([[0.79541312]])), ('spouse', array([[0.79541312]])), ('spray', array([[0.55024939]])), ('explosion', array([[0.54050855]])), ('explore', array([[0.54050855]]))]
------------------------------------------------------


In [27]:
define(['male', 'spouse'])

male, spouse
GPT-3:
[('width', array([[0.92843548]])), ('p.m.', array([[0.92446656]])), ('railroad', array([[0.92237943]])), ('cancel', array([[0.92166726]])), ('faith', array([[0.92109198]])), ('running', array([[0.91976917]])), ('geography', array([[0.91698051]])), ('fetch', array([[0.91675896]])), ('painful', array([[0.91674291]])), ('detailed', array([[0.91596848]]))]
------------------------------------------------------
OPT:
[('spice', array([[0.73963857]])), ('spend', array([[0.73963857]])), ('spouses', array([[0.73963857]])), ('spare', array([[0.73963857]])), ('spider', array([[0.73963857]])), ('spicy', array([[0.73963857]])), ('spouse', array([[0.73963857]])), ('male', array([[0.69444489]])), ('female', array([[0.60856924]])), ('girl', array([[0.48234141]]))]
------------------------------------------------------


In [28]:
define(['male', 'spouses'])

male, spouses
GPT-3:
[('party', array([[0.9303511]])), ('geography', array([[0.92946202]])), ('fetch', array([[0.92698467]])), ('p.m.', array([[0.92653765]])), ('railroad', array([[0.92604905]])), ('running', array([[0.92502615]])), ('faith', array([[0.92343408]])), ('cancel', array([[0.92161899]])), ('painful', array([[0.92026344]])), ('ours', array([[0.92018114]]))]
------------------------------------------------------
OPT:
[('spice', array([[0.73963857]])), ('spend', array([[0.73963857]])), ('spouses', array([[0.73963857]])), ('spare', array([[0.73963857]])), ('spider', array([[0.73963857]])), ('spicy', array([[0.73963857]])), ('spouse', array([[0.73963857]])), ('male', array([[0.69444489]])), ('female', array([[0.60856924]])), ('girl', array([[0.48234141]]))]
------------------------------------------------------


In [29]:
sim_test(['male', 'spouse'], 'husband')

male, spouse -> husband
GPT-3:
0.8915628588982125
------------------------------------------------------
OPT:
0.4480070369135848
------------------------------------------------------


In [30]:
sim_test(['male', 'spouses'], 'husbands')

male, spouses -> husbands
GPT-3:
0.8687174186239781
------------------------------------------------------
OPT:
0.376106180318447
------------------------------------------------------


In [31]:
sim_test(['man', 'spouse'], 'husband')

man, spouse -> husband
GPT-3:
0.8939715264899359
------------------------------------------------------
OPT:
0.4823565514986736
------------------------------------------------------


In [32]:
sim_test(['men', 'spouses'], 'husbands')

men, spouses -> husbands
GPT-3:
0.865877233058076
------------------------------------------------------
OPT:
0.46150598006724075
------------------------------------------------------


### B - Brothers, sisters, and daughters
Family words related to children.

* Male, sibling, brother.

In [33]:
define(['man', 'sibling'])

man, sibling
GPT-3:
[('mixture', array([[0.93901828]])), ('priority', array([[0.91530786]])), ('strip', array([[0.91235929]])), ('diary', array([[0.9096537]])), ('sheet', array([[0.9095486]])), ('infect', array([[0.90333015]])), ('use', array([[0.9018139]])), ('clean', array([[0.90094298]])), ('supply', array([[0.89903186]])), ('mile', array([[0.8990293]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.77141602]])), ('spite', array([[0.77141602]])), ('socially', array([[0.77141602]])), ('sad', array([[0.77141602]])), ('sour', array([[0.77141602]])), ('sail', array([[0.77141602]])), ('solution', array([[0.77141602]])), ('sore', array([[0.77141602]])), ('somewhere', array([[0.77141602]])), ('somewhat', array([[0.77141602]]))]
------------------------------------------------------


In [34]:
sim_test(['man', 'sibling'], 'brother')

man, sibling -> brother
GPT-3:
0.8396088279513378
------------------------------------------------------
OPT:
0.4718785477911247
------------------------------------------------------


In [35]:
sim_test(['men', 'siblings'], 'brothers')

men, siblings -> brothers
GPT-3:
0.8410352855408932
------------------------------------------------------
OPT:
0.3338315445112433
------------------------------------------------------


In [36]:
define(['boys', 'siblings'])

boys, siblings
GPT-3:
[('attach', array([[0.94378946]])), ('sufficiently', array([[0.94127007]])), ('painful', array([[0.9228015]])), ('palace', array([[0.92166]])), ('deeply', array([[0.91670334]])), ('lazy', array([[0.91661007]])), ('faith', array([[0.91613933]])), ('musician', array([[0.91588915]])), ('millimetre', array([[0.9156872]])), ('reach', array([[0.91548756]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.79624576]])), ('spite', array([[0.79624576]])), ('socially', array([[0.79624576]])), ('sad', array([[0.79624576]])), ('sour', array([[0.79624576]])), ('sail', array([[0.79624576]])), ('solution', array([[0.79624576]])), ('sore', array([[0.79624576]])), ('somewhere', array([[0.79624576]])), ('somewhat', array([[0.79624576]]))]
------------------------------------------------------


In [37]:
sim_test(['boy', 'sibling'], 'brother')

boy, sibling -> brother
GPT-3:
0.8409528303692211
------------------------------------------------------
OPT:
0.45804936194924417
------------------------------------------------------


In [38]:
sim_test(['boys', 'siblings'], 'brothers')

boys, siblings -> brothers
GPT-3:
0.8471016673330751
------------------------------------------------------
OPT:
0.4301174496472042
------------------------------------------------------


In [39]:
define(['male', 'sibling'])

male, sibling
GPT-3:
[('running', array([[0.91181414]])), ('fetch', array([[0.9080858]])), ('railroad', array([[0.90697853]])), ('faith', array([[0.90686211]])), ('cancel', array([[0.90676692]])), ('geography', array([[0.90671527]])), ('promptly', array([[0.90480561]])), ('p.m.', array([[0.90445498]])), ('main', array([[0.90416444]])), ('affect', array([[0.90355422]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.76271871]])), ('spite', array([[0.76271871]])), ('socially', array([[0.76271871]])), ('sad', array([[0.76271871]])), ('sour', array([[0.76271871]])), ('sail', array([[0.76271871]])), ('solution', array([[0.76271871]])), ('sore', array([[0.76271871]])), ('somewhere', array([[0.76271871]])), ('somewhat', array([[0.76271871]]))]
------------------------------------------------------


In [40]:
sim_test(['male', 'sibling'], 'brother')

male, sibling -> brother
GPT-3:
0.8541904415388808
------------------------------------------------------
OPT:
0.3833718637754112
------------------------------------------------------


In [41]:
sim_test(['male', 'siblings'], 'brothers')

male, siblings -> brothers
GPT-3:
0.8316815112957328
------------------------------------------------------
OPT:
0.38337190105688335
------------------------------------------------------


* Female, sibling, sister

In [42]:
define(['female', 'sibling'])

female, sibling
GPT-3:
[('deep', array([[0.92549443]])), ('painful', array([[0.918009]])), ('cancel', array([[0.9160146]])), ('promptly', array([[0.91476622]])), ('palace', array([[0.91425834]])), ('affect', array([[0.91366803]])), ('primary', array([[0.9127056]])), ('running', array([[0.91260407]])), ('upset', array([[0.91226773]])), ('supply', array([[0.91106401]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.73396598]])), ('spite', array([[0.73396598]])), ('socially', array([[0.73396598]])), ('sad', array([[0.73396598]])), ('sour', array([[0.73396598]])), ('sail', array([[0.73396598]])), ('solution', array([[0.73396598]])), ('sore', array([[0.73396598]])), ('somewhere', array([[0.73396598]])), ('somewhat', array([[0.73396598]]))]
------------------------------------------------------


In [43]:
define(['female', 'siblings'])

female, siblings
GPT-3:
[('attach', array([[0.94840848]])), ('deep', array([[0.94007258]])), ('painful', array([[0.9375789]])), ('cancel', array([[0.93415229]])), ('palace', array([[0.93217046]])), ('automatic', array([[0.92539383]])), ('reach', array([[0.92490965]])), ('critical', array([[0.92458128]])), ('running', array([[0.92398693]])), ('p.m.', array([[0.92317658]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.73396598]])), ('spite', array([[0.73396598]])), ('socially', array([[0.73396598]])), ('sad', array([[0.73396598]])), ('sour', array([[0.73396598]])), ('sail', array([[0.73396598]])), ('solution', array([[0.73396598]])), ('sore', array([[0.73396598]])), ('somewhere', array([[0.73396598]])), ('somewhat', array([[0.73396598]]))]
------------------------------------------------------


In [44]:
sim_test(['female', 'sibling'], 'sister')

female, sibling -> sister
GPT-3:
0.8577400040210705
------------------------------------------------------
OPT:
0.7339659830282095
------------------------------------------------------


In [45]:
sim_test(['female', 'siblings'], 'sisters')

female, siblings -> sisters
GPT-3:
0.8599560310184233
------------------------------------------------------
OPT:
0.7339659830282095
------------------------------------------------------


In [46]:
define(['woman', 'sibling'])

woman, sibling
GPT-3:
[('use', array([[0.90781291]])), ('criticize', array([[0.90627842]])), ('disappointment', array([[0.90514363]])), ('convention', array([[0.90302389]])), ('supply', array([[0.90275653]])), ('promptly', array([[0.90171579]])), ('affect', array([[0.90120201]])), ('writing', array([[0.89982994]])), ('initially', array([[0.89950243]])), ('sight', array([[0.89904202]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.72627026]])), ('spite', array([[0.72627026]])), ('socially', array([[0.72627026]])), ('sad', array([[0.72627026]])), ('sour', array([[0.72627026]])), ('sail', array([[0.72627026]])), ('solution', array([[0.72627026]])), ('sore', array([[0.72627026]])), ('somewhere', array([[0.72627026]])), ('somewhat', array([[0.72627026]]))]
------------------------------------------------------


In [47]:
define(['women', 'siblings'])

women, siblings
GPT-3:
[('attach', array([[0.94335248]])), ('ear', array([[0.93385946]])), ('painful', array([[0.92785258]])), ('palace', array([[0.92506742]])), ('project', array([[0.92327227]])), ('detailed', array([[0.92300329]])), ('empire', array([[0.92207786]])), ('cancel', array([[0.92117149]])), ('lazy', array([[0.92042697]])), ('south', array([[0.91886338]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.7213529]])), ('spite', array([[0.7213529]])), ('socially', array([[0.7213529]])), ('sad', array([[0.7213529]])), ('sour', array([[0.7213529]])), ('sail', array([[0.7213529]])), ('solution', array([[0.7213529]])), ('sore', array([[0.7213529]])), ('somewhere', array([[0.7213529]])), ('somewhat', array([[0.7213529]]))]
------------------------------------------------------


In [48]:
sim_test(['woman', 'sibling'], 'sister')

woman, sibling -> sister
GPT-3:
0.8481172800513745
------------------------------------------------------
OPT:
0.7262702627388922
------------------------------------------------------


In [49]:
sim_test(['women', 'siblings'], 'sisters')

women, siblings -> sisters
GPT-3:
0.8795312890785606
------------------------------------------------------
OPT:
0.7213528951732341
------------------------------------------------------


In [50]:
define(['girl', 'sibling'])

girl, sibling
GPT-3:
[('window', array([[0.91540565]])), ('use', array([[0.91085134]])), ('primary', array([[0.90949964]])), ('unfortunately', array([[0.90856722]])), ('sight', array([[0.90597707]])), ('supply', array([[0.90581062]])), ('novel', array([[0.90564322]])), ('impressed', array([[0.90547067]])), ('convention', array([[0.90505321]])), ('end', array([[0.90493376]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.77256035]])), ('spite', array([[0.77256035]])), ('socially', array([[0.77256035]])), ('sad', array([[0.77256035]])), ('sour', array([[0.77256035]])), ('sail', array([[0.77256035]])), ('solution', array([[0.77256035]])), ('sore', array([[0.77256035]])), ('somewhere', array([[0.77256035]])), ('somewhat', array([[0.77256035]]))]
------------------------------------------------------


In [51]:
define(['girl', 'siblings'])

girl, siblings
GPT-3:
[('attach', array([[0.94210448]])), ('window', array([[0.93377698]])), ('painful', array([[0.92442016]])), ('instead', array([[0.92411282]])), ('palace', array([[0.92192259]])), ('confront', array([[0.92050739]])), ('essentially', array([[0.92033357]])), ('lazy', array([[0.9194402]])), ('religious', array([[0.91651251]])), ('novel', array([[0.91625012]]))]
------------------------------------------------------
OPT:
[('salt', array([[0.77256035]])), ('spite', array([[0.77256035]])), ('socially', array([[0.77256035]])), ('sad', array([[0.77256035]])), ('sour', array([[0.77256035]])), ('sail', array([[0.77256035]])), ('solution', array([[0.77256035]])), ('sore', array([[0.77256035]])), ('somewhere', array([[0.77256035]])), ('somewhat', array([[0.77256035]]))]
------------------------------------------------------


In [52]:
sim_test(['girl', 'sibling'], 'sister')

girl, sibling -> sister
GPT-3:
0.8665039544669282
------------------------------------------------------
OPT:
0.7725603538456707
------------------------------------------------------


In [53]:
sim_test(['girls', 'siblings'], 'sisters')

girls, siblings -> sisters
GPT-3:
0.8587090827138641
------------------------------------------------------
OPT:
0.7753146823324963
------------------------------------------------------


* Female, children, daughter

In [54]:
define(['girl', 'children'])

girl, children
GPT-3:
[('uncle', array([[0.91653626]])), ('adopt', array([[0.91585119]])), ('window', array([[0.91493453]])), ('star', array([[0.91407403]])), ('they', array([[0.91147709]])), ('bake', array([[0.90771697]])), ('palace', array([[0.9073263]])), ('painful', array([[0.90538119]])), ('traveller', array([[0.90279068]])), ('confront', array([[0.90249859]]))]
------------------------------------------------------
OPT:
[('girls', array([[0.8251192]])), ('boys', array([[0.81460419]])), ('daughter', array([[0.81100346]])), ('woman', array([[0.7987132]])), ('girlfriend', array([[0.75019475]])), ('boyfriend', array([[0.73805742]])), ('boy', array([[0.73805738]])), ('guy', array([[0.71024044]])), ('people', array([[0.67681593]])), ('wives', array([[0.67037292]]))]
------------------------------------------------------


In [55]:
define(['woman', 'children'])

woman, children
GPT-3:
[('adopt', array([[0.91614025]])), ('star', array([[0.9148706]])), ('uncle', array([[0.91383739]])), ('they', array([[0.91332712]])), ('palace', array([[0.91010612]])), ('environmental', array([[0.90185463]])), ('disappointment', array([[0.90089292]])), ('bake', array([[0.90038313]])), ('painful', array([[0.89864822]])), ('cancel', array([[0.89739831]]))]
------------------------------------------------------
OPT:
[('daughter', array([[0.81887209]])), ('girl', array([[0.74777191]])), ('girls', array([[0.74721738]])), ('girlfriend', array([[0.7354829]])), ('boys', array([[0.7302005]])), ('people', array([[0.69824701]])), ('wives', array([[0.69449503]])), ('husband', array([[0.68972898]])), ('membership', array([[0.67387876]])), ('guy', array([[0.67102855]]))]
------------------------------------------------------


In [56]:
define(['female', 'children'])

female, children
GPT-3:
[('deep', array([[0.92573277]])), ('uncle', array([[0.91951944]])), ('adopt', array([[0.91738473]])), ('painful', array([[0.91609185]])), ('star', array([[0.91604889]])), ('palace', array([[0.91505008]])), ('particularly', array([[0.9142275]])), ('environmental', array([[0.91276548]])), ('bake', array([[0.91231643]])), ('they', array([[0.91155265]]))]
------------------------------------------------------
OPT:
[('male', array([[0.80123609]])), ('daughter', array([[0.75877652]])), ('girls', array([[0.74866687]])), ('woman', array([[0.72263097]])), ('boys', array([[0.71128604]])), ('girl', array([[0.68704716]])), ('girlfriend', array([[0.6852243]])), ('individual', array([[0.65883879]])), ('wives', array([[0.6455857]])), ('dozen', array([[0.64022679]]))]
------------------------------------------------------


In [57]:
sim_test(['girl', 'chidren'], 'daughter')

girl, chidren -> daughter
GPT-3:
0.8401352904638635
------------------------------------------------------
OPT:
0.6647990755577187
------------------------------------------------------


In [58]:
sim_test(['woman', 'children'], 'daughter')

woman, children -> daughter
GPT-3:
0.8420301527910761
------------------------------------------------------
OPT:
0.818872086127856
------------------------------------------------------


In [59]:
sim_test(['female', 'children'], 'daughter')

female, children -> daughter
GPT-3:
0.8445875597374192
------------------------------------------------------
OPT:
0.7587765188730518
------------------------------------------------------


In [60]:
define(['female', 'offspring'])

female, offspring
GPT-3:
[('international', array([[0.95321031]])), ('deep', array([[0.93202035]])), ('painful', array([[0.92778771]])), ('suited', array([[0.92689145]])), ('married', array([[0.92329431]])), ('cancel', array([[0.92124075]])), ('palace', array([[0.92106429]])), ('running', array([[0.92046663]])), ('south', array([[0.91710052]])), ('faith', array([[0.91676896]]))]
------------------------------------------------------
OPT:
[('offspring', array([[0.71163806]])), ('offsprings', array([[0.71163806]])), ('offence', array([[0.58833787]])), ('offense', array([[0.58833787]])), ('offend', array([[0.58833787]])), ('off', array([[0.58833783]])), ('girls', array([[0.56922933]])), ('boys', array([[0.55696124]])), ('normal', array([[0.55677885]])), ('girl', array([[0.55465191]]))]
------------------------------------------------------


In [61]:
define(['female', 'offsprings'])

female, offsprings
GPT-3:
[('painful', array([[0.94093495]])), ('deep', array([[0.93769553]])), ('cancel', array([[0.9321885]])), ('sir', array([[0.92848834]])), ('p.m.', array([[0.92823463]])), ('geography', array([[0.92542417]])), ('faith', array([[0.92316683]])), ('reach', array([[0.92267821]])), ('detailed', array([[0.9222622]])), ('running', array([[0.92177646]]))]
------------------------------------------------------
OPT:
[('offspring', array([[0.71163806]])), ('offsprings', array([[0.71163806]])), ('offence', array([[0.58833787]])), ('offense', array([[0.58833787]])), ('offend', array([[0.58833787]])), ('off', array([[0.58833783]])), ('girls', array([[0.56922933]])), ('boys', array([[0.55696124]])), ('normal', array([[0.55677885]])), ('girl', array([[0.55465191]]))]
------------------------------------------------------


In [62]:
sim_test(['female', 'offspring'], 'daughter')

female, offspring -> daughter
GPT-3:
0.867739635594221
------------------------------------------------------
OPT:
0.531856289544029
------------------------------------------------------


In [63]:
sim_test(['female', 'offsprings'], 'daughters')

female, offsprings -> daughters
GPT-3:
0.8918549073463924
------------------------------------------------------
OPT:
0.0945083443477654
------------------------------------------------------


In [64]:
define(['woman', 'offspring'])

woman, offspring
GPT-3:
[('international', array([[0.94209221]])), ('suited', array([[0.91873119]])), ('married', array([[0.91066208]])), ('disappointment', array([[0.90805711]])), ('movie', array([[0.90690083]])), ('palace', array([[0.90676449]])), ('south', array([[0.90427567]])), ('insert', array([[0.90362966]])), ('actor', array([[0.90271004]])), ('faith', array([[0.90241922]]))]
------------------------------------------------------
OPT:
[('offsprings', array([[0.71875965]])), ('girl', array([[0.67422698]])), ('daughter', array([[0.66228454]])), ('husband', array([[0.63210058]])), ('guy', array([[0.63174581]])), ('wives', array([[0.62623424]])), ('manner', array([[0.62532302]])), ('manage', array([[0.62532302]])), ('man', array([[0.62532301]])), ('boys', array([[0.62437982]]))]
------------------------------------------------------


In [65]:
define(['woman', 'offsprings'])

woman, offsprings
GPT-3:
[('sir', array([[0.92102418]])), ('painful', array([[0.92080745]])), ('railroad', array([[0.91733202]])), ('cancel', array([[0.91730146]])), ('geography', array([[0.91642609]])), ('disappointment', array([[0.9160576]])), ('faith', array([[0.91517561]])), ('fetch', array([[0.91495424]])), ('remarkably', array([[0.9145772]])), ('p.m.', array([[0.91418087]]))]
------------------------------------------------------
OPT:
[('offsprings', array([[0.71875965]])), ('girl', array([[0.67422698]])), ('daughter', array([[0.66228454]])), ('husband', array([[0.63210058]])), ('guy', array([[0.63174581]])), ('wives', array([[0.62623424]])), ('manner', array([[0.62532302]])), ('manage', array([[0.62532302]])), ('man', array([[0.62532301]])), ('boys', array([[0.62437982]]))]
------------------------------------------------------


In [66]:
sim_test(['woman', 'offspring'], 'daughter')

woman, offspring -> daughter
GPT-3:
0.8565423896407167
------------------------------------------------------
OPT:
0.662284535495212
------------------------------------------------------


In [67]:
sim_test(['women', 'offsprings'], 'daughters')

women, offsprings -> daughters
GPT-3:
0.8878329751940737
------------------------------------------------------
OPT:
0.13760378441083476
------------------------------------------------------


### C - Animals, diminutives, and augmentatives!
Is a baby duck a duckling?

In [68]:
define(['baby', 'duck'])

baby, duck
GPT-3:
[('fetch', array([[0.94558363]])), ('p.m.', array([[0.94479164]])), ('faith', array([[0.94428225]])), ('painful', array([[0.9392233]])), ('geography', array([[0.93803537]])), ('musician', array([[0.93739275]])), ('railroad', array([[0.93677627]])), ('cancel', array([[0.93586957]])), ('service', array([[0.93541335]])), ('girlfriend', array([[0.93453179]]))]
------------------------------------------------------
OPT:
[('duck', array([[0.72724027]])), ('dull', array([[0.72724027]])), ('dog', array([[0.5647032]])), ('brothers', array([[0.56411802]])), ('brother', array([[0.564118]])), ('boy', array([[0.55674875]])), ('boyfriend', array([[0.55674875]])), ('dad', array([[0.53733558]])), ('daughter', array([[0.53455816]])), ('king', array([[0.53442599]]))]
------------------------------------------------------


In [69]:
sim_test(['baby', 'duck'], 'duckling')

baby, duck -> duckling
GPT-3:
0.897308247851017
------------------------------------------------------
OPT:
0.7272402678422522
------------------------------------------------------


In [70]:
define(['small', 'duck'])

small, duck
GPT-3:
[('sir', array([[0.92546081]])), ('painful', array([[0.92234558]])), ('nervous', array([[0.91944586]])), ('faith', array([[0.91931763]])), ('tidy', array([[0.91709425]])), ('fold', array([[0.91632713]])), ('cancel', array([[0.91627355]])), ('actor', array([[0.91604444]])), ('true', array([[0.9158778]])), ('prize', array([[0.91558175]]))]
------------------------------------------------------
OPT:
[('duck', array([[0.68800619]])), ('dull', array([[0.68800619]])), ('tiny', array([[0.67986722]])), ('large', array([[0.66799242]])), ('largely', array([[0.66799242]])), ('big', array([[0.61491595]])), ('huge', array([[0.58844506]])), ('medium', array([[0.57612503]])), ('massive', array([[0.56426607]])), ('tall', array([[0.55352241]]))]
------------------------------------------------------


In [71]:
sim_test(['small', 'duck'], 'duckling')

small, duck -> duckling
GPT-3:
0.882054549998347
------------------------------------------------------
OPT:
0.6880061916452935
------------------------------------------------------


In [72]:
define(['little', 'duck'])

little, duck
GPT-3:
[('uncomfortable', array([[0.94179995]])), ('permit', array([[0.92754926]])), ('fetch', array([[0.92190154]])), ('faith', array([[0.92148741]])), ('painful', array([[0.92067581]])), ('railroad', array([[0.91942785]])), ('palace', array([[0.91902756]])), ('cancel', array([[0.91844589]])), ('tidy', array([[0.91806266]])), ('separately', array([[0.91772735]]))]
------------------------------------------------------
OPT:
[('duck', array([[0.70334926]])), ('dull', array([[0.70334926]])), ('tiny', array([[0.60146533]])), ('big', array([[0.53456926]])), ('small', array([[0.51774871]])), ('young', array([[0.48727158]])), ('tall', array([[0.4851514]])), ('quiet', array([[0.47759869]])), ('quietly', array([[0.47759868]])), ('way', array([[0.47690587]]))]
------------------------------------------------------


In [73]:
sim_test(['little', 'duck'], 'duckling')

little, duck -> duckling
GPT-3:
0.8837072469577396
------------------------------------------------------
OPT:
0.703349255480737
------------------------------------------------------


In [74]:
define(['child', 'duck'])

child, duck
GPT-3:
[('permit', array([[0.92738942]])), ('painful', array([[0.92647701]])), ('dollar', array([[0.92209303]])), ('faith', array([[0.92097938]])), ('traveller', array([[0.91975183]])), ('tip', array([[0.91944645]])), ('tidy', array([[0.91866874]])), ('musician', array([[0.91796413]])), ('lecture', array([[0.91770484]])), ('sensitive', array([[0.91765714]]))]
------------------------------------------------------
OPT:
[('duck', array([[0.74099451]])), ('dull', array([[0.74099451]])), ('dog', array([[0.57064565]])), ('kid', array([[0.56298606]])), ('son', array([[0.55410378]])), ('parent', array([[0.54959422]])), ('jewellery', array([[0.53967056]])), ('daughter', array([[0.53249454]])), ('children', array([[0.52867892]])), ('boyfriend', array([[0.52641028]]))]
------------------------------------------------------


In [75]:
sim_test(['child', 'duck'], 'duckling')

child, duck -> duckling
GPT-3:
0.8923464670576384
------------------------------------------------------
OPT:
0.7409945058662459
------------------------------------------------------


In [76]:
define(['small', 'cat'])

small, cat
GPT-3:
[('sir', array([[0.93894873]])), ('request', array([[0.92836981]])), ('true', array([[0.92641984]])), ('pride', array([[0.92635361]])), ('painful', array([[0.92512622]])), ('debt', array([[0.92436316]])), ('category', array([[0.92403086]])), ('prize', array([[0.92276058]])), ('nervous', array([[0.92238745]])), ('fold', array([[0.92049372]]))]
------------------------------------------------------
OPT:
[('tiny', array([[0.72089965]])), ('large', array([[0.71504085]])), ('largely', array([[0.71504085]])), ('medium', array([[0.63646629]])), ('dog', array([[0.631871]])), ('big', array([[0.62717905]])), ('horse', array([[0.62409984]])), ('massive', array([[0.61414608]])), ('huge', array([[0.61372582]])), ('bird', array([[0.61055423]]))]
------------------------------------------------------


In [77]:
sim_test(['small', 'cat'], 'kitten')

small, cat -> kitten
GPT-3:
0.8577500890852248
------------------------------------------------------
OPT:
0.1724695026315471
------------------------------------------------------


In [78]:
define(['baby', 'cat'])

baby, cat
GPT-3:
[('p.m.', array([[0.94845328]])), ('faith', array([[0.94605369]])), ('fetch', array([[0.94382014]])), ('painful', array([[0.94274814]])), ('mad', array([[0.94222608]])), ('sure', array([[0.94141385]])), ('musician', array([[0.94099478]])), ('geography', array([[0.9407841]])), ('railroad', array([[0.93749465]])), ('detailed', array([[0.93652217]]))]
------------------------------------------------------
OPT:
[('dog', array([[0.70858555]])), ('horse', array([[0.67216926]])), ('bird', array([[0.63262863]])), ('girl', array([[0.61238388]])), ('daughter', array([[0.60215841]])), ('child', array([[0.60139632]])), ('fish', array([[0.60117591]])), ('chair', array([[0.60001787]])), ('chairwoman', array([[0.60001774]])), ('chairman', array([[0.60001774]]))]
------------------------------------------------------


In [79]:
sim_test(['baby', 'cat'], 'kitten')

baby, cat -> kitten
GPT-3:
0.8382702496540664
------------------------------------------------------
OPT:
0.21856457354269876
------------------------------------------------------


In [80]:
define(['child', 'cat'])

child, cat
GPT-3:
[('sir', array([[0.93098449]])), ('painful', array([[0.92994462]])), ('dollar', array([[0.92767769]])), ('god', array([[0.92678911]])), ('print', array([[0.92479548]])), ('tip', array([[0.92283918]])), ('faith', array([[0.92280647]])), ('lecture', array([[0.92239058]])), ('musician', array([[0.92158506]])), ('garden', array([[0.92085516]]))]
------------------------------------------------------
OPT:
[('dog', array([[0.71574174]])), ('horse', array([[0.6712779]])), ('bird', array([[0.64615888]])), ('animal', array([[0.64045742]])), ('kid', array([[0.63315697]])), ('children', array([[0.60605723]])), ('daughter', array([[0.60089631]])), ('baby', array([[0.5930229]])), ('parent', array([[0.58487549]])), ('woman', array([[0.58406119]]))]
------------------------------------------------------


In [81]:
sim_test(['child', 'cat'], 'kitten')

child, cat -> kitten
GPT-3:
0.840062334931147
------------------------------------------------------
OPT:
0.22087311371926976
------------------------------------------------------


In [82]:
define(['small', 'dog'])

small, dog
GPT-3:
[('pink', array([[0.92265997]])), ('thus', array([[0.91883569]])), ('debt', array([[0.91490908]])), ('sir', array([[0.91471006]])), ('our', array([[0.91285894]])), ('anger', array([[0.9113272]])), ('painful', array([[0.91106002]])), ('faith', array([[0.91100371]])), ('project', array([[0.91087175]])), ('prize', array([[0.91061859]]))]
------------------------------------------------------
OPT:
[('tiny', array([[0.72037018]])), ('large', array([[0.71748748]])), ('largely', array([[0.71748747]])), ('big', array([[0.66850789]])), ('horse', array([[0.65057693]])), ('medium', array([[0.64333136]])), ('huge', array([[0.62830192]])), ('massive', array([[0.62361094]])), ('bird', array([[0.61673945]])), ('cat', array([[0.60768976]]))]
------------------------------------------------------


In [83]:
sim_test(['small', 'dog'], 'puppy')

small, dog -> puppy
GPT-3:
0.8379980894460464
------------------------------------------------------
OPT:
0.09048168363567047
------------------------------------------------------


In [84]:
define(['baby', 'dog'])

baby, dog
GPT-3:
[('faith', array([[0.94048947]])), ('p.m.', array([[0.9363032]])), ('fetch', array([[0.93497318]])), ('sure', array([[0.93322338]])), ('musician', array([[0.93261014]])), ('painful', array([[0.93248474]])), ('project', array([[0.93084438]])), ('mad', array([[0.93082802]])), ('pop', array([[0.93043548]])), ('thus', array([[0.92975692]]))]
------------------------------------------------------
OPT:
[('horse', array([[0.69111262]])), ('cat', array([[0.66298352]])), ('girl', array([[0.65390104]])), ('boyfriend', array([[0.63962771]])), ('boy', array([[0.63962769]])), ('bird', array([[0.63291144]])), ('daughter', array([[0.6326813]])), ('kid', array([[0.62975222]])), ('dad', array([[0.62677309]])), ('child', array([[0.62313886]]))]
------------------------------------------------------


In [85]:
sim_test(['baby', 'dog'], 'puppy')

baby, dog -> puppy
GPT-3:
0.838821379081278
------------------------------------------------------
OPT:
0.12311561706945026
------------------------------------------------------


In [86]:
define(['child', 'dog'])

child, dog
GPT-3:
[('thus', array([[0.92760923]])), ('pink', array([[0.92375225]])), ('our', array([[0.92352074]])), ('god', array([[0.92351801]])), ('painful', array([[0.92072344]])), ('anger', array([[0.92014789]])), ('faith', array([[0.91816297]])), ('jacket', array([[0.91728785]])), ('print', array([[0.91682653]])), ('lecture', array([[0.91574212]]))]
------------------------------------------------------
OPT:
[('horse', array([[0.69044731]])), ('cat', array([[0.66894426]])), ('kid', array([[0.66740919]])), ('animal', array([[0.65607992]])), ('bird', array([[0.6460142]])), ('children', array([[0.64470101]])), ('daughter', array([[0.63192987]])), ('woman', array([[0.62751119]])), ('girl', array([[0.62610157]])), ('boyfriend', array([[0.61511701]]))]
------------------------------------------------------


In [87]:
sim_test(['child', 'dog'], 'puppy')

child, dog -> puppy
GPT-3:
0.8626987176526459
------------------------------------------------------
OPT:
0.12315296554770769
------------------------------------------------------


In [88]:
define(['baby', 'male', 'horse'])

baby, male, horse
GPT-3:
[('running', array([[0.94181219]])), ('p.m.', array([[0.93671669]])), ('geography', array([[0.93580801]])), ('regulation', array([[0.93529647]])), ('fetch', array([[0.93212432]])), ('faith', array([[0.9316958]])), ('railroad', array([[0.93142148]])), ('horse', array([[0.92950995]])), ('painful', array([[0.92885185]])), ('cancel', array([[0.92825565]]))]
------------------------------------------------------
OPT:
[('male', array([[0.80014965]])), ('female', array([[0.76371483]])), ('girl', array([[0.68438185]])), ('daughter', array([[0.66976626]])), ('dog', array([[0.65867228]])), ('dozen', array([[0.64406857]])), ('child', array([[0.6348364]])), ('woman', array([[0.6338089]])), ('adult', array([[0.63075304]])), ('rider', array([[0.63045849]]))]
------------------------------------------------------


In [89]:
define(['adult', 'female', 'horse'])

adult, female, horse
GPT-3:
[('deep', array([[0.93524596]])), ('adult', array([[0.93335622]])), ('running', array([[0.93160731]])), ('cancel', array([[0.93084813]])), ('geography', array([[0.9304093]])), ('critical', array([[0.92898235]])), ('railroad', array([[0.92880537]])), ('p.m.', array([[0.92683084]])), ('palace', array([[0.92650243]])), ('regulation', array([[0.92611908]]))]
------------------------------------------------------
OPT:
[('male', array([[0.81625047]])), ('horse', array([[0.77601841]])), ('ordinary', array([[0.64794139]])), ('normal', array([[0.6460642]])), ('animal', array([[0.64456649]])), ('professional', array([[0.64221758]])), ('human', array([[0.64000844]])), ('regularly', array([[0.63741843]])), ('regular', array([[0.63741836]])), ('aggressive', array([[0.63667383]]))]
------------------------------------------------------


In [90]:
define(['adult', 'male', 'pig'])

adult, male, pig
GPT-3:
[('railroad', array([[0.92994474]])), ('pig', array([[0.92802976]])), ('p.m.', array([[0.92801637]])), ('cancel', array([[0.92651977]])), ('sir', array([[0.92555307]])), ('musician', array([[0.92453199]])), ('till', array([[0.92351806]])), ('painful', array([[0.92323255]])), ('detailed', array([[0.92273397]])), ('faith', array([[0.92191605]]))]
------------------------------------------------------
OPT:
[('female', array([[0.67045014]])), ('piano', array([[0.58121523]])), ('pupil', array([[0.58121523]])), ('pension', array([[0.58121523]])), ('possible', array([[0.58121523]])), ('pursue', array([[0.58121523]])), ('purchase', array([[0.58121523]])), ('pint', array([[0.58121523]])), ('p.m.', array([[0.58121523]])), ('praise', array([[0.58121523]]))]
------------------------------------------------------


In [91]:
define(['adult', 'male', 'human'])

adult, male, human
GPT-3:
[('adult', array([[0.93266518]])), ('fetch', array([[0.93209523]])), ('p.m.', array([[0.93102566]])), ('running', array([[0.92911551]])), ('cancel', array([[0.92828343]])), ('painful', array([[0.92750833]])), ('resident', array([[0.92656574]])), ('detailed', array([[0.9261149]])), ('faith', array([[0.92582756]])), ('musician', array([[0.92527566]]))]
------------------------------------------------------
OPT:
[('female', array([[0.81946733]])), ('adult', array([[0.80872713]])), ('individual', array([[0.65270663]])), ('normal', array([[0.6426562]])), ('professional', array([[0.64122466]])), ('animal', array([[0.63904795]])), ('ordinary', array([[0.63306369]])), ('aggressive', array([[0.6315707]])), ('unique', array([[0.62899993]])), ('traditional', array([[0.62242159]]))]
------------------------------------------------------


In [92]:
define(['adult', 'female', 'human'])

adult, female, human
GPT-3:
[('deep', array([[0.93716648]])), ('painful', array([[0.93648091]])), ('geography', array([[0.92967681]])), ('cancel', array([[0.92935938]])), ('human', array([[0.92926627]])), ('railroad', array([[0.92857335]])), ('palace', array([[0.92690942]])), ('till', array([[0.926251]])), ('p.m.', array([[0.92597495]])), ('yours', array([[0.92562348]]))]
------------------------------------------------------
OPT:
[('human', array([[0.82418039]])), ('adult', array([[0.80692317]])), ('individual', array([[0.6632536]])), ('normal', array([[0.65522189]])), ('professional', array([[0.65363425]])), ('ordinary', array([[0.6507222]])), ('traditional', array([[0.64363193]])), ('aggressive', array([[0.64337068]])), ('unique', array([[0.64064231]])), ('regularly', array([[0.63985642]]))]
------------------------------------------------------


### D - Philosophy, math, miscellaneous
The rest of our old tests.

In [93]:
define(['persuade', 'sentence'])

persuade, sentence
GPT-3:
[('badly', array([[0.93669279]])), ('betting', array([[0.92758232]])), ('busy', array([[0.92083611]])), ('gasoline', array([[0.9202528]])), ('reality', array([[0.91669369]])), ('itself', array([[0.91620985]])), ('song', array([[0.91607274]])), ('noticeable', array([[0.91499261]])), ('affection', array([[0.91417384]])), ('abandon', array([[0.91247439]]))]
------------------------------------------------------
OPT:
[('preserve', array([[0.53298057]])), ('presumably', array([[0.53298057]])), ('presence', array([[0.53298057]])), ('essay', array([[0.52635704]])), ('essentially', array([[0.52635704]])), ('send', array([[0.52236059]])), ('soldier', array([[0.52234702]])), ('responsibility', array([[0.51497824]])), ('insurance', array([[0.48443989]])), ('insult', array([[0.48443989]]))]
------------------------------------------------------


In [94]:
sim_test(['persuade', 'sentence'], 'argument')

persuade, sentence -> argument
GPT-3:
0.8632533265475226
------------------------------------------------------
OPT:
0.29909598991345765
------------------------------------------------------


In [95]:
define(['logical', 'belief'])

logical, belief
GPT-3:
[('painful', array([[0.94407583]])), ('geography', array([[0.94151192]])), ('railroad', array([[0.94109712]])), ('p.m.', array([[0.94076712]])), ('faith', array([[0.94028171]])), ('tightly', array([[0.93789961]])), ('critical', array([[0.93691529]])), ('musician', array([[0.93662476]])), ('reveal', array([[0.93628935]])), ('fetch', array([[0.93533123]]))]
------------------------------------------------------
OPT:
[('belong', array([[0.80396041]])), ('belief', array([[0.80396041]])), ('believe', array([[0.80396041]])), ('challenge', array([[0.55157835]])), ('politically', array([[0.54760348]])), ('politician', array([[0.54760348]])), ('vertical', array([[0.54384625]])), ('technique', array([[0.54212991]])), ('survive', array([[0.53618082]])), ('practical', array([[0.53289052]]))]
------------------------------------------------------


In [96]:
sim_test(['logical', 'belief'], 'argument')

logical, belief -> argument
GPT-3:
0.9160718816942856
------------------------------------------------------
OPT:
0.32303828185548833
------------------------------------------------------


In [97]:
define(['logical', 'reason'])

logical, reason
GPT-3:
[('stock', array([[0.94497193]])), ('painful', array([[0.93865343]])), ('railroad', array([[0.93573513]])), ('critical', array([[0.93289972]])), ('geography', array([[0.93179805]])), ('p.m.', array([[0.93119304]])), ('harmless', array([[0.93011167]])), ('faith', array([[0.92962625]])), ('reveal', array([[0.92855963]])), ('musician', array([[0.92583717]]))]
------------------------------------------------------
OPT:
[('reasonably', array([[0.79379828]])), ('reason', array([[0.79379825]])), ('list', array([[0.60674183]])), ('listen', array([[0.60674182]])), ('plug', array([[0.60266433]])), ('claim', array([[0.60221004]])), ('tip', array([[0.58938398]])), ('method', array([[0.58420824]])), ('signal', array([[0.58298836]])), ('significantly', array([[0.58298836]]))]
------------------------------------------------------


In [98]:
sim_test(['logical', 'reason'], 'argument')

logical, reason -> argument
GPT-3:
0.9002934753084291
------------------------------------------------------
OPT:
0.5803985030778962
------------------------------------------------------


In [99]:
define(['thinking', 'argument'])

thinking, argument
GPT-3:
[('railroad', array([[0.92953899]])), ('painful', array([[0.92827987]])), ('substantially', array([[0.9282762]])), ('married', array([[0.92687983]])), ('geography', array([[0.92578606]])), ('crisp', array([[0.92554119]])), ('pressure', array([[0.92531534]])), ('faith', array([[0.92478623]])), ('p.m.', array([[0.92416276]])), ('sharp', array([[0.92372705]]))]
------------------------------------------------------
OPT:
[('think', array([[0.72082248]])), ('meaning', array([[0.71746689]])), ('statement', array([[0.68029962]])), ('determination', array([[0.6608852]])), ('thought', array([[0.65921105]])), ('ahead', array([[0.65853522]])), ('reasonably', array([[0.64335814]])), ('reason', array([[0.64335811]])), ('claim', array([[0.64302677]])), ('issue', array([[0.64193145]]))]
------------------------------------------------------


In [100]:
sim_test(['thinking', 'argument'], 'philosophy')

thinking, argument -> philosophy
GPT-3:
0.9012961462088043
------------------------------------------------------
OPT:
0.2668171909312883
------------------------------------------------------


In [101]:
sim_test(['thought', 'argument'], 'philosophy')

thought, argument -> philosophy
GPT-3:
0.8977197605222109
------------------------------------------------------
OPT:
0.29035310310067364
------------------------------------------------------


In [102]:
define(['true', 'belief'])

true, belief
GPT-3:
[('painful', array([[0.94647005]])), ('railroad', array([[0.9413442]])), ('critical', array([[0.9404674]])), ('p.m.', array([[0.94029256]])), ('geography', array([[0.93894082]])), ('sir', array([[0.93847576]])), ('cancel', array([[0.93822778]])), ('palace', array([[0.93753308]])), ('actually', array([[0.93662061]])), ('detailed', array([[0.93502189]]))]
------------------------------------------------------
OPT:
[('belief', array([[0.72549273]])), ('believe', array([[0.72549273]])), ('real', array([[0.56530701]])), ('realize', array([[0.56530701]])), ('realistic', array([[0.56530701]])), ('aware', array([[0.52233409]])), ('false', array([[0.49804749]])), ('sure', array([[0.49592698]])), ('surely', array([[0.49592693]])), ('valid', array([[0.48707629]]))]
------------------------------------------------------


In [103]:
sim_test(['true', 'belief'], 'knowledge')

true, belief -> knowledge
GPT-3:
0.8868240877731091
------------------------------------------------------
OPT:
0.3330208813494506
------------------------------------------------------


In [104]:
define(['spiritual', 'belief'])

spiritual, belief
GPT-3:
[('dot', array([[0.94073239]])), ('railroad', array([[0.91999745]])), ('painful', array([[0.91858588]])), ('actually', array([[0.91716593]])), ('critical', array([[0.91644914]])), ('cancel', array([[0.91484993]])), ('geography', array([[0.91347712]])), ('musician', array([[0.91294121]])), ('palace', array([[0.9127524]])), ('independently', array([[0.91272207]]))]
------------------------------------------------------
OPT:
[('believe', array([[0.79551563]])), ('spirit', array([[0.75978001]])), ('spiritual', array([[0.75977995]])), ('bell', array([[0.5553537]])), ('mistaken', array([[0.50554524]])), ('mistake', array([[0.50554524]])), ('virtually', array([[0.50176336]])), ('survive', array([[0.49885493]])), ('challenge', array([[0.49842358]])), ('religion', array([[0.49107328]]))]
------------------------------------------------------


In [105]:
sim_test(['spiritual', 'belief'], 'religion')

spiritual, belief -> religion
GPT-3:
0.8485276341928929
------------------------------------------------------
OPT:
0.49107328420785173
------------------------------------------------------


In [106]:
define(['right', 'belief'])

right, belief
GPT-3:
[('painful', array([[0.91685907]])), ('actually', array([[0.91252915]])), ('curious', array([[0.91236697]])), ('tightly', array([[0.91218768]])), ('railroad', array([[0.91148723]])), ('delay', array([[0.91106328]])), ('palace', array([[0.91061818]])), ('cancel', array([[0.90986761]])), ('us', array([[0.90951625]])), ('bake', array([[0.90725356]]))]
------------------------------------------------------
OPT:
[('belong', array([[0.71809226]])), ('belief', array([[0.71809226]])), ('believe', array([[0.71809226]])), ('left', array([[0.57448158]])), ('correct', array([[0.56651486]])), ('correctly', array([[0.56651483]])), ('wrongly', array([[0.5066144]])), ('wrong', array([[0.50661436]])), ('ought', array([[0.49254615]])), ('steadily', array([[0.49060028]]))]
------------------------------------------------------


In [107]:
sim_test(['right', 'belief'], 'moral')

right, belief -> moral
GPT-3:
0.8486567301185883
------------------------------------------------------
OPT:
0.31519358791454893
------------------------------------------------------


In [108]:
sim_test(['right', 'belief'], 'ideal')

right, belief -> ideal
GPT-3:
0.8721914049388988
------------------------------------------------------
OPT:
0.3941574302096134
------------------------------------------------------


In [109]:
define(['good', 'evil', 'belief'])

good, evil, belief
GPT-3:
[('good', array([[0.9308848]])), ('painful', array([[0.92840944]])), ('cancel', array([[0.9245966]])), ('critical', array([[0.92452661]])), ('become', array([[0.92386925]])), ('phase', array([[0.92244803]])), ('other', array([[0.92232604]])), ('evil', array([[0.92223569]])), ('railroad', array([[0.92213277]])), ('harmless', array([[0.92204286]]))]
------------------------------------------------------
OPT:
[('good', array([[0.80069438]])), ('evil', array([[0.74801955]])), ('bad-tempered', array([[0.70582589]])), ('badly', array([[0.70582589]])), ('bad', array([[0.70582587]])), ('great', array([[0.66750762]])), ('greatly', array([[0.66750761]])), ('cool', array([[0.6458937]])), ('solid', array([[0.62689077]])), ('powerful', array([[0.61722925]]))]
------------------------------------------------------


In [110]:
sim_test(['good', 'evil', 'belief'], 'moral')

good, evil, belief -> moral
GPT-3:
0.8641716196507854
------------------------------------------------------
OPT:
0.4268944058004254
------------------------------------------------------


In [111]:
sim_test(['woman', 'leader'], 'chairwoman')

woman, leader -> chairwoman
GPT-3:
0.8615575969668379
------------------------------------------------------
OPT:
0.6069163783288674
------------------------------------------------------


In [112]:
sim_test(['man', 'leader'], 'chairman')

man, leader -> chairman
GPT-3:
0.8812164216505443
------------------------------------------------------
OPT:
0.5625140050433731
------------------------------------------------------


In [113]:
define(['rectangular', 'table', 'numbers'])

rectangular, table, numbers
GPT-3:
[('palace', array([[0.94003799]])), ('damage', array([[0.93838589]])), ('they', array([[0.93075692]])), ('rectangular', array([[0.9291309]])), ('us', array([[0.9284431]])), ('phase', array([[0.92728538]])), ('virtually', array([[0.92587001]])), ('my', array([[0.92576034]])), ('belief', array([[0.92559076]])), ('art', array([[0.92423126]]))]
------------------------------------------------------
OPT:
[('narrow', array([[0.73310079]])), ('naked', array([[0.73310079]])), ('nonsense', array([[0.73310079]])), ('niece', array([[0.73310079]])), ('nest', array([[0.73310079]])), ('nervous', array([[0.73310079]])), ('nurse', array([[0.73310079]])), ('nephew', array([[0.73310079]])), ('ninth', array([[0.73310079]])), ('navy', array([[0.73310079]]))]
------------------------------------------------------


In [114]:
define(['relation', 'two', 'sets'])

relation, two, sets
GPT-3:
[('sets', array([[0.94308371]])), ('painful', array([[0.94263134]])), ('musician', array([[0.93675604]])), ('sharp', array([[0.93627486]])), ('phase', array([[0.93601383]])), ('motor', array([[0.93586518]])), ('traveller', array([[0.93518029]])), ('faith', array([[0.93485898]])), ('critical', array([[0.93468944]])), ('reveal', array([[0.93431214]]))]
------------------------------------------------------
OPT:
[('three', array([[0.77964718]])), ('dozen', array([[0.74944073]])), ('fourteenth', array([[0.73844933]])), ('fourteen', array([[0.73844933]])), ('four', array([[0.7384493]])), ('few', array([[0.73840937]])), ('relation', array([[0.71510068]])), ('eighteenth', array([[0.71316501]])), ('eightieth', array([[0.71316501]])), ('eighteen', array([[0.71316501]]))]
------------------------------------------------------


In [115]:
define(['scalar', 'function', 'matrix'])

scalar, function, matrix
GPT-3:
[('travel', array([[0.93469953]])), ('additional', array([[0.93334434]])), ('prison', array([[0.93159487]])), ('uncertain', array([[0.93080937]])), ('matrix', array([[0.93058896]])), ('boat', array([[0.92880126]])), ('traveller', array([[0.92601973]])), ('pressure', array([[0.92336992]])), ('late', array([[0.92257613]])), ('difficult', array([[0.92192047]]))]
------------------------------------------------------
OPT:
[('screw', array([[0.72271699]])), ('scared', array([[0.72271699]])), ('scissors', array([[0.72271699]])), ('scare', array([[0.72271699]])), ('scream', array([[0.72271699]])), ('matrix', array([[0.70795241]])), ('mathematics', array([[0.70795241]])), ('function', array([[0.60097745]])), ('describe', array([[0.53490897]])), ('literature', array([[0.52265456]]))]
------------------------------------------------------


In [116]:
sim_test(['objective'], 'goal')

objective -> goal
GPT-3:
0.8408090139690735
------------------------------------------------------
OPT:
0.48903844614371444
------------------------------------------------------


In [117]:
sim_test(['actor'], 'actress')

actor -> actress
GPT-3:
0.8744601066235698
------------------------------------------------------
OPT:
0.3160795757561419
------------------------------------------------------


In [118]:
sim_test(['afternoon'], 'morning')

afternoon -> morning
GPT-3:
0.8443593299688373
------------------------------------------------------
OPT:
0.3889530819172624
------------------------------------------------------


## Define Helpers (for magnitude)

In [119]:
# this function doesn't allow model specification, as GPT-3 vectors are normalized.
def sum_complexity(word, vec=False):
    """
    Args:
        word: iterable, string, or vector   -> embedding to be summed.
        vec: bool                           -> True if word is already vector.
    Returns:
        Float score of the sum of the absolute value of each datapoint in the embedding.
    """
    phrase = word
    if not vec:
        if isinstance(word, str):
            phrase = positive([word], model='opt')
        else:
            phrase = positive(word, model='opt')
    
    modified = [abs(float(x)) for x in phrase[0]]
    return sum(modified)
    

In [120]:
def above_zero_complexity(word, vec=False, threshold=0.3):
    """
    Args:
        word: iterable, string, or vector -> embedding to be evaluated
        vec: bool                         -> True if word is already vector.
        threshold: float                  -> Absolute value cutoff point
    Returns:
        Float score of the amount of 'signifigant' features according to the threshold,
        divided by the number of total dimensions.
    """
    phrase = word
    if not vec:
        if isinstance(word, str):
            phrase = positive([word], model='opt')
        else:
            phrase = positive(word, model='opt')

    modified = [abs(float(x)) for x in phrase[0] if abs(float(x)) > threshold]
    return len(modified) / phrase.shape[1]

In [121]:
def compare_complexity(words, threshold=0.3):
    """
    Args:
        words: iterable     -> items to be converted to tokens and compared.
        threshold: fload    -> threshold for signifigant features with a_z_c()
    Returns:
        None. Formatted printing results for each item in words.
    """
    for w in words:
        print(w)
        print(f"Sum Complexity: {sum_complexity(w)}")
        print(f"Above Zero Complexity t-[{threshold}]: {above_zero_complexity(w, threshold=threshold)}")
        print('------------------------------------------------------')

## Conduct Magnitude Tests
This is a shorter section, and all of our tests are on OPT-1.3b.

In [122]:
compare_complexity(['man', 'bachelor', 'husband', 'father'])

man
Sum Complexity: 3869.585998736322
Above Zero Complexity t-[0.3]: 0.88427734375
------------------------------------------------------
bachelor
Sum Complexity: 3790.6826499924064
Above Zero Complexity t-[0.3]: 0.88427734375
------------------------------------------------------
husband
Sum Complexity: 3166.1516402363777
Above Zero Complexity t-[0.3]: 0.86865234375
------------------------------------------------------
father
Sum Complexity: 3597.100124645978
Above Zero Complexity t-[0.3]: 0.88232421875
------------------------------------------------------


In [123]:
compare_complexity(['woman', 'wife', 'mother'])

woman
Sum Complexity: 3872.5394445955753
Above Zero Complexity t-[0.3]: 0.89453125
------------------------------------------------------
wife
Sum Complexity: 3150.264439454302
Above Zero Complexity t-[0.3]: 0.86572265625
------------------------------------------------------
mother
Sum Complexity: 3577.550979346037
Above Zero Complexity t-[0.3]: 0.88623046875
------------------------------------------------------


In [124]:
compare_complexity(['cat', 'kitten'])

cat
Sum Complexity: 3582.265288449824
Above Zero Complexity t-[0.3]: 0.8984375
------------------------------------------------------
kitten
Sum Complexity: 3580.9174359850585
Above Zero Complexity t-[0.3]: 0.8759765625
------------------------------------------------------


In [125]:
compare_complexity(['dog', 'puppy'])

dog
Sum Complexity: 3529.7578548304737
Above Zero Complexity t-[0.3]: 0.87548828125
------------------------------------------------------
puppy
Sum Complexity: 3814.6731686517596
Above Zero Complexity t-[0.3]: 0.89111328125
------------------------------------------------------


In [126]:
compare_complexity(['duck', 'duckling'])

duck
Sum Complexity: 2995.471445657313
Above Zero Complexity t-[0.3]: 0.85888671875
------------------------------------------------------
duckling
Sum Complexity: 2995.471445657313
Above Zero Complexity t-[0.3]: 0.85888671875
------------------------------------------------------


In [127]:
compare_complexity(['set', 'powerset'])

set
Sum Complexity: 3559.9400723166764
Above Zero Complexity t-[0.3]: 0.8818359375
------------------------------------------------------
powerset
Sum Complexity: 3149.4010306224227
Above Zero Complexity t-[0.3]: 0.8720703125
------------------------------------------------------


In [128]:
compare_complexity(['matrix', 'determinant'])

matrix
Sum Complexity: 2957.5441108606756
Above Zero Complexity t-[0.3]: 0.8505859375
------------------------------------------------------
determinant
Sum Complexity: 3786.9694365635514
Above Zero Complexity t-[0.3]: 0.89013671875
------------------------------------------------------


## Phrase comparison
In the past, we have not had access to models that make a phrase, so we use positives. I will explore some of those combinations below.
Phrases of interest:
* "white house"
* "baby duck"
* "logical belief"

and so on...

In [14]:
define(['logical belief']) # this usage should work.

logical belief
GPT-3:
[('spoken', array([[0.93689853]])), ('lorry', array([[0.92014554]])), ('up', array([[0.91018386]])), ('bedroom', array([[0.8914305]])), ('sea', array([[0.8895526]])), ('list', array([[0.86985034]])), ('finished', array([[0.86606042]])), ('wife', array([[0.86285496]])), ('technology', array([[0.85796904]])), ('broken', array([[0.85456666]])), ('palace', array([[0.85325309]])), ('crack', array([[0.85229479]]))]
------------------------------------------------------
OPT:
[('logic', array([[1.]])), ('logical', array([[1.]])), ('plug', array([[0.56746377]])), ('signal', array([[0.53169372]])), ('significantly', array([[0.53169372]])), ('signature', array([[0.53169372]])), ('sign', array([[0.53169371]])), ('dig', array([[0.52574307]])), ('list', array([[0.52280699]])), ('listen', array([[0.52280696]])), ('tropical', array([[0.50721447]])), ('typical', array([[0.50444691]]))]
------------------------------------------------------


In [15]:
sim_test(["white", "house"], 'white house') # this is also valid!

white, house -> white house
GPT-3:
0.8369098570992568
------------------------------------------------------
OPT:
0.8413638347523291
------------------------------------------------------


In [16]:
sim_test(['white house'], 'Joe Biden') # we can even do unique, new tests with our wrappers!

white house -> Joe Biden
GPT-3:
0.8575577079311343
------------------------------------------------------
OPT:
0.14631690383655632
------------------------------------------------------


In [17]:
define(['baby duck'])

baby duck
GPT-3:
[('elect', array([[0.93903434]])), ('pipe', array([[0.92644236]])), ('ink', array([[0.90906323]])), ('federal', array([[0.87660101]])), ('will', array([[0.87258577]])), ('electrical', array([[0.86769641]])), ('purpose', array([[0.86710025]])), ('give', array([[0.86388197]])), ('meat', array([[0.86152264]])), ('birth', array([[0.86036832]])), ('kitten', array([[0.85628729]])), ('underneath', array([[0.8552504]]))]
------------------------------------------------------
OPT:
[('baby', array([[1.]])), ('girl', array([[0.58292659]])), ('daughter', array([[0.58173501]])), ('brothers', array([[0.57661696]])), ('brother', array([[0.57661695]])), ('boyfriend', array([[0.56609102]])), ('boy', array([[0.566091]])), ('child', array([[0.55158211]])), ('young', array([[0.54128088]])), ('father', array([[0.53199546]])), ('kid', array([[0.53126761]])), ('horse', array([[0.52665455]]))]
------------------------------------------------------


In [18]:
sim_test(['baby', 'duck'], 'baby duck')

baby, duck -> baby duck
GPT-3:
0.8015340615398907
------------------------------------------------------
OPT:
0.8168440247990836
------------------------------------------------------


In [19]:
sim_test(['baby duck'], 'duckling')

baby duck -> duckling
GPT-3:
0.7931393528732875
------------------------------------------------------
OPT:
0.19809594661709146
------------------------------------------------------


As some brief discussion before submit, some of these scores are very surprising. That final sim_test has me scratching my head a bit, why did OPT score it at 0.19?

There are different sizes of OPT to be tried, but I like the 2048-D vectors from OPT-1.3B, and it didn't use too much cache space. We can (and should) test this on larger versions (Paramenter size can be changed- ['125m', '350m', '1.3b', '2.7b', '6.7b', '13b', '30b']). For reference, 350M has 512-D vectors. If there is an error in vector generation I will fix it, I hope this is easy to use and read.