In [None]:
import requests , re , numpy , json , os , pprint
import pandas as pd
import seaborn as sns
import pylab as plt
import matplotlib as mpl
from transformers import pipeline, AutoTokenizer, AutoModelWithLMHead
mpl.rcParams['pdf.fonttype'] = 42

In [None]:
def query(payload, model_id, api_token):
    headers = {"Authorization": f"Bearer {api_token}"}
    API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

In [None]:
def query_sentiment(payload , api_token):
    headers = {"Authorization": f"Bearer {api_token}"}
    API_URL = "https://huggingface.co/finiteautomata/beto-sentiment-analysis"
    data = json.dumps(payload)
    response = requests.request("POST", API_URL, headers=headers, data=data)
    return json.loads(response.content.decode("utf-8"))


In [None]:
def query_models(qtext ,models , num_return_sequences , max_new_tokens , api_token):
    responses = []
    for model_id in models:
        print(model_id)
        data = query(    {
                "inputs": qtext,
                "parameters": {"max_new_tokens": max_new_tokens , "num_return_sequences": num_return_sequences},
                "options": {"use_cache":False,"wait_for_model":True}
            }, model_id, api_token)
        #print(data)
        responses.append(data)
    return responses

## Dunning Kruger

In [None]:
num_return_sequences = 100
max_new_tokens = 50
models = numpy.array(['gpt2', 'gpt2-large', 'gpt2-medium', 'gpt2-xl', 'openai-gpt']) # 'EleutherAI/gpt-neo-1.3B', 'EleutherAI/gpt-neo-2.7B',
model_size = [ 117 , 774 , 345 , 1558 , 110] # 1300 , 2700 ,
api_token = "" # get yours at hf.co/settings/token

In [None]:
# zero shot
qtext = "On a scale of 1 (worst) to 10 (best), I rate my own capability as"#
responses = query_models(qtext , models , num_return_sequences , max_new_tokens , api_token)
# one shot
qtext_one_shot = "The Dunning–Kruger effect is a hypothetical cognitive bias stating that people with low ability at a task overestimate their own ability, and that people with high ability at a task underestimate their own ability. "+\
        "I try to avoid this bias. "+\
        "On a scale of 1 (worst) to 10 (best), I rate my own capability as"
responses_one_shot = query_models(qtext_one_shot , models , num_return_sequences , max_new_tokens , api_token)

In [None]:
ests = numpy.zeros((2*len(models) , num_return_sequences))
for xx in numpy.arange(len(models)):
    for ii , data in enumerate(responses[xx]):
        mts = re.findall(r"[-+]?\d*\.\d+|\d+", data)
        mts = numpy.array([float(xx) for xx in mts])
        mts = numpy.delete(mts , numpy.where(mts > 10.))
        ests[xx , ii] = numpy.nan if len(mts) < 3 else mts[2]
    for ii , data in enumerate(responses_one_shot[xx]):
        mts = re.findall(r"[-+]?\d*\.\d+|\d+", data)
        mts = numpy.array([float(xx) for xx in mts])
        mts = numpy.delete(mts , numpy.where(mts > 10.))
        ests[len(models) + xx , ii] = numpy.nan if len(mts) < 3 else mts[2] # numpy.nan

In [None]:
n_models = [numpy.array(xx) for xx in models]
n_models.sort()
t_models = numpy.tile(n_models,(num_return_sequences,2) ).transpose()
t_sizes = numpy.tile(model_size,(num_return_sequences,2) ).transpose()
shot = numpy.vstack((numpy.zeros((num_return_sequences , len(models))).transpose() , numpy.ones((num_return_sequences , len(models))).transpose()))
df = pd.DataFrame(data = {'estimates':ests.ravel(), 'model name': t_models.ravel() , 'size': t_sizes.ravel() , 'shot': shot.ravel()})
sns.lmplot(x="size", y="estimates", hue="shot", data=df, x_jitter=30, scatter_kws={"s": 100}, height=10)
plt.savefig("dunning_kruger.pdf")

In [None]:
numpy.save('dunning_kruger.npy'  , numpy.asanyarray((responses , responses_one_shot , df) , dtype=object ))

## Halo effect

In [None]:
qtext = "I see a beautiful person. I think their personality is"
responses = query_models(qtext , models , num_return_sequences , max_new_tokens , api_token)
qtext_one_shot = "Halo effect (sometimes called the halo error) is the tendency for positive impressions of a person, company, brand or product in one area to positively influence one's opinion or feelings in other areas. "+\
        "I try to avoid this bias. "+\
        "I see a beautiful person. I think their personality is"
responses_one_shot = query_models(qtext_one_shot , models , num_return_sequences , max_new_tokens , api_token)

In [None]:
classifier = pipeline('sentiment-analysis')
ests = numpy.zeros((2*len(models) , num_return_sequences))
for xx in numpy.arange(len(models)):
    print(xx)
    for ii , data in enumerate(responses[xx]):
        resp = data[len(qtext):]
        data_sent = classifier(resp)
        ests[xx , ii] = data_sent[0]['score'] if data_sent[0]['label'] == 'POSITIVE' else 1 - data_sent[0]['score']
    for ii , data in enumerate(responses_one_shot[xx]):
        resp = data[len(qtext_one_shot):]
        data_sent = classifier(resp)
        ests[len(models) + xx , ii] = data_sent[0]['score'] if data_sent[0]['label'] == 'POSITIVE' else 1 - data_sent[0]['score']
n_models = [numpy.array(xx) for xx in models]
n_models.sort()
t_models = numpy.tile(n_models,(num_return_sequences,2) ).transpose()
t_sizes = numpy.tile(model_size,(num_return_sequences,2) ).transpose() #numpy.repeat(model_size , num_return_sequences , axis=0)
shot = numpy.vstack((numpy.zeros((num_return_sequences , len(models))).transpose() , numpy.ones((num_return_sequences , len(models))).transpose()))
df = pd.DataFrame(data = {'estimates':ests.ravel(), 'model name': t_models.ravel() , 'size': t_sizes.ravel() , 'shot': shot.ravel()})
sns.lmplot(x="size", y="estimates", hue="shot", data=df, x_jitter=10, scatter_kws={"s": 100}, height=10)
plt.savefig("halo.pdf")

In [None]:
numpy.save('halo.npy'  , numpy.asanyarray((responses , responses_one_shot , df) , dtype=object ))

### Conjunction fallacy

In [None]:
qtext = "Linda is 31 years old, single, outspoken, and very bright. She majored in philosophy. As a student, she was deeply concerned with issues of discrimination and social justice, and also participated in anti-nuclear demonstrations. "+\
    "Which is more probable? 1) Linda is a bank teller. 2) Linda is a bank teller and is active in the feminist movement. "+\
    "My answer is:"
responses = query_models(qtext , models , num_return_sequences , max_new_tokens , api_token)

In [None]:
qtext_one_shot = "Linda is 31 years old, single, outspoken, and very bright. She majored in philosophy. As a student, she was deeply concerned with issues of discrimination and social justice, and also participated in anti-nuclear demonstrations. "+\
    "Which is more probable? 1) Linda is a bank teller and is active in the feminist movement. 2) Linda is a bank teller. "+\
    "My answer is:"
responses_one_shot = query_models(qtext_one_shot , models , num_return_sequences , max_new_tokens , api_token)

In [None]:
ests = numpy.zeros((2*len(models) , num_return_sequences))
for xx in numpy.arange(len(models)):
    for ii , data in enumerate(responses[xx]):
        mts = re.findall(r"[-+]?\d*\.\d+|\d+", data)
        mts = numpy.array([float(xx) for xx in mts])
        mts = numpy.delete(mts , numpy.where(mts > 2.))
        mts = numpy.delete(mts , numpy.where(mts < 1.))
        ests[xx , ii] = numpy.nan if len(mts) < 3 else mts[2]
    for ii , data in enumerate(responses_one_shot[xx]):
        mts = re.findall(r"[-+]?\d*\.\d+|\d+", data)
        mts = numpy.array([float(xx) for xx in mts])
        mts = numpy.delete(mts , numpy.where(mts > 2.))
        mts = numpy.delete(mts , numpy.where(mts < 1.))
        ests[len(models) + xx , ii] = numpy.nan if len(mts) < 3 else mts[2] # numpy.nan

In [None]:
n_models = [numpy.array(xx) for xx in models]
n_models.sort()
t_models = numpy.tile(n_models,(num_return_sequences,2) ).transpose()
t_sizes = numpy.tile(model_size,(num_return_sequences,2) ).transpose() #numpy.repeat(model_size , num_return_sequences , axis=0)
shot = numpy.vstack((numpy.zeros((num_return_sequences , len(models))).transpose() , numpy.ones((num_return_sequences , len(models))).transpose()))
df = pd.DataFrame(data = {'estimates':ests.ravel(), 'model name': t_models.ravel() , 'size': t_sizes.ravel() , 'shot': shot.ravel()})
sns.lmplot(x="size", y="estimates", hue="shot", data=df, x_jitter=30, scatter_kws={"s": 100}, height=10)
plt.savefig("conjunction_zero_one.pdf")

In [None]:
numpy.save('conjunction_zero_one.npy' , numpy.asanyarray((responses , responses_one_shot , df) , dtype=object )) #responses_one_shot

### Luria's camels

In [None]:
qtext = "All bears are white where there is always snow; in Zovaya Zemlya there is always snow; what color are the bears there?"+\
    "My answer is:"
responses = query_models(qtext , models , num_return_sequences , max_new_tokens , api_token)

In [None]:
qtext_one_shot = "According to Aristotle, the first syllogism of the first figure (Barbara) should read: A belongs to all B, B belongs to all C; "+\
    "therefore A belongs to all C. This is obviously valid by the transitivity of inclusion. All bears are white where there is always snow; "+\
    "in Zovaya Zemlya there is always snow; what color are the bears there? "+\
    "My answer is:"
responses_one_shot_alt = query_models(qtext_one_shot , models , num_return_sequences , max_new_tokens , api_token)

In [None]:
ests = numpy.zeros((2*len(models) , num_return_sequences))
for xx in numpy.arange(len(models)):
    for ii , data in enumerate(responses[xx]):
        mts = re.findall(r"white", data[len(qtext)+1:], re.IGNORECASE)
        ests[xx , ii] = 0 if len(mts) < 1 else 1
    for ii , data in enumerate(responses_one_shot_alt[xx]):
        mts = re.findall(r"white", data[len(qtext_one_shot)+1:], re.IGNORECASE)
        ests[len(models) + xx , ii] = 0 if len(mts) < 1 else 1

In [None]:
n_models = [numpy.array(xx) for xx in models]
n_models.sort()
t_models = numpy.tile(n_models,(num_return_sequences,2) ).transpose()
t_sizes = numpy.tile(model_size,(num_return_sequences,2) ).transpose() #numpy.repeat(model_size , num_return_sequences , axis=0)
shot = numpy.vstack((numpy.zeros((num_return_sequences , len(models))).transpose() , numpy.ones((num_return_sequences , len(models))).transpose()))
df = pd.DataFrame(data = {'estimates':ests.ravel(), 'model name': t_models.ravel() , 'size': t_sizes.ravel() , 'shot': shot.ravel()}) # 
sns.lmplot(x="size", y="estimates", hue="shot", data=df, x_jitter=30, scatter_kws={"s": 100}, height=10)
plt.savefig("camel_alt.pdf")

In [None]:
numpy.save('camel.npy' , numpy.asanyarray((responses  ,responses_one_shot_alt , df) , dtype=object )) #responses_one_shot

### Get samples

In [None]:
dat = numpy.load('dunning_kruger.npy' , allow_pickle=True)
pprint.pprint([xx[20:24:2] for xx in dat[0]])
pprint.pprint([xx[20:24:2] for xx in dat[1]])

In [None]:
dat = numpy.load('halo.npy' , allow_pickle=True)
pprint.pprint([xx[10:14:2] for xx in dat[0]])
pprint.pprint([xx[10:14:2] for xx in dat[1]])

In [None]:
dat = numpy.load('conjunction_zero_one.npy' , allow_pickle=True)
pprint.pprint([xx[20:24:2] for xx in dat[0]])
pprint.pprint([xx[20:24:2] for xx in dat[1]])

In [None]:
dat = numpy.load('camel.npy' , allow_pickle=True)
pprint.pprint([xx[20:24:2] for xx in dat[0]])
pprint.pprint([xx[20:24:2] for xx in dat[1]])