[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ryansteed/image-gpt-bias/blob/master/notebooks/experiments.ipynb)

# Image GPT Bias
**Image Embedding Association Test**

Ryan Steed

This script adapted from https://colab.research.google.com/github/apeguero1/image-gpt/blob/master/Transformers_Image_GPT.ipynb.


In [1]:
import sys
import os
sys.path.insert(0, os.path.abspath('..'))

%load_ext autoreload
%autoreload 2

%cd /Users/steed/caliskan/git/image-gpt-bias

/Users/steed/caliskan/git/image-gpt-bias


## Download Model

In [2]:
!nvidia-smi #OpenAI says you need 16GB GPU for the large model, but it may work if you lower n_sub_batch on the others.

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [2]:
import urllib

# don't share these to Github or elsewhere!
u = "your_username"
p = urllib.parse.quote("your_password")
!git clone --recurse-submodules -j8 https://$u:$p@github.com/$u/image-gpt-bias.git
%cd image-gpt-bias

Cloning into 'image-gpt-bias'...
remote: Invalid username or password.
fatal: Authentication failed for 'https://github.com/your_username/image-gpt-bias.git/'
[Errno 2] No such file or directory: 'image-gpt-bias'
/Users/steed/caliskan/git/image-gpt-bias


In [4]:
# note - you may need to restart the kernel for these installations to take effect
!pip install -e .
!pip install -e weat

Obtaining file:///content/image-gpt-bias
Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d8/f4/9f93f06dd2c57c7cd7aa515ffbf9fcfd8a084b92285732289f4a5696dd91/transformers-3.2.0-py3-none-any.whl (1.0MB)
[K     |████████████████████████████████| 1.0MB 8.1MB/s 
Collecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 31.8MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 51.1MB/s 
Collecting tokenizers==0.8.1.rc2
[?25l  Downloading https://files.pythonhosted.org/packages/80/83/8b9fccb9e48eeb575ee19179e2bdde0ee9a1904f97de5f02d19016b8804f/tokenizers-0.8.1rc2-cp36

In [3]:
model_sizes = ["s", "m", "l"] #small medium large, xl not available
model_size = "l"
models_dir = "models"
color_clusters_dir = "clusters"
n_px = 32
depth = 152
width = 3
sk = 1

In [None]:
# download the model - skip if already downloaded
!python image-gpt/download.py --model {model_size} --ckpt 1000000 --clusters --download_dir {models_dir}/{model_size}
!python image-gpt/download.py --clusters --download_dir {color_clusters_dir}

## IATs

### Quick Summary

In [4]:
from ieat.api import test_all

import pandas as pd
import numpy as np

results = test_all(
    {
        'logit': (model_size,models_dir,color_clusters_dir,n_px),
        'openai': (model_size,models_dir,color_clusters_dir,n_px),
        'simclr': (depth, width, sk)
    }
)

# logit #
## Insect-Flower ##
## Weapon ##




## Weapon (Modern) ##
## Native ##




## Asian ##
## Weight ##




## Skin-Tone ##
## Disability ##




## President - Kennedy vs. Trump ##
## President - B. Clinton vs. Trump ##




## President - Bush vs. Trump ##
## President - Lincoln vs. Trump ##




## Religion ##
## Sexuality ##




## Race ##
## Arab-Muslim ##




## Age ##
## Gender-Science ##
## Gender-Career ##
# openai #
## Insect-Flower ##
## Weapon ##




## Weapon (Modern) ##




## Native ##




## Asian ##




## Weight ##
## Skin-Tone ##




## Disability ##




## President - Kennedy vs. Trump ##




## President - B. Clinton vs. Trump ##




## President - Bush vs. Trump ##




## President - Lincoln vs. Trump ##




## Religion ##




## Sexuality ##
## Race ##




## Arab-Muslim ##




## Age ##




## Gender-Science ##
## Gender-Career ##
# simclr #
## Insect-Flower ##
## Weapon ##




## Weapon (Modern) ##




## Native ##




## Asian ##




## Weight ##
## Skin-Tone ##




## Disability ##




## President - Kennedy vs. Trump ##




## President - B. Clinton vs. Trump ##




## President - Bush vs. Trump ##




## President - Lincoln vs. Trump ##




## Religion ##




## Sexuality ##
## Race ##




## Arab-Muslim ##
## Age ##




## Gender-Science ##
## Gender-Career ##


In [5]:
  import pandas as pd

results_df = pd.DataFrame(results).transpose()
results_df.columns = ["X", "Y", "A", "B", "effect", "p", "n_t", "n_a"]
for c in results_df.columns[:4]:
    results_df[c] = results_df[c].str.split("/").str[-1]
results_df["sig"] = ""
for l in [0.10, 0.05, 0.01]:
    results_df.sig[results_df.p < l] += "*"
# results_df[results_df.index.get_level_values(1) == "simclr"]
results_df

Unnamed: 0,Unnamed: 1,X,Y,A,B,effect,p,n_t,n_a,sig
Insect-Flower,logit,flower,insect,pleasant,unpleasant,-0.324741,0.9081,35,55,
Weapon,logit,white,black,tool,weapon,-1.64256,0.998918,6,7,
Weapon (Modern),logit,white,black,tool-modern,weapon-modern,-1.18975,0.980519,6,9,
Native,logit,euro,native,us,world,0.317243,0.2673,8,5,
Asian,logit,european-american,asian-american,american,foreign,0.252278,0.357143,6,6,
Weight,logit,thin,fat,pleasant,unpleasant,-0.835974,0.9699,10,55,
Skin-Tone,logit,light,dark,pleasant,unpleasant,0.420003,0.362471,7,55,
Disability,logit,disabled,abled,pleasant,unpleasant,-0.652465,0.757143,4,55,
President - Kennedy vs. Trump,logit,kennedy,trump,pleasant,unpleasant,-0.955629,0.953463,6,55,
President - B. Clinton vs. Trump,logit,clinton,trump,pleasant,unpleasant,-0.766086,0.900433,6,55,


In [7]:
# Generate latex table output
experiments = (
    "Insect-Flower",
    "Gender-Science",
    "Gender_Career",
    "Skin-Tone",
    "Race",
    "Weapon",
    "Weapon (Modern)",
    "Native",
    "Asian",
    "Weight",
    "Religion",
    "Sexuality",
    "Disability",
    "Arab-Muslim",
    "Age"
)
results_df.index = results_df.index.set_names(["Test", "Model"])
results_df = results_df.sort_index(level="Test")
results_df[results_df.index.get_level_values("Test").isin(experiments)]\
    .drop(columns=["X", "Y", "A", "B"]).to_latex('output/results.tex')
results_df.droplevel("Model").groupby("Test").head(1)\
    .drop(columns=["effect", "p", "n_t", "n_a", "sig"]).to_latex('output/iats.tex')
stimuli = pd.read_csv("data/stimuli.csv")
stimuli.dropna(axis=0, subset=["Word", "Source"])\
    .drop(columns="Collected (N=n)").set_index(["IAT", "Category"]).to_latex("output/stimuli.tex")

### Detail View

#### Weapons

In [None]:
from ieat.api import test

test(
    "data/experiments/weapon/black",
    "data/experiments/weapon/white",
    "data/experiments/weapon/tool",
    "data/experiments/weapon/weapon",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px
#     from_cache=False
)

#### Native

In [None]:
test(
    "data/experiments/native/euro",
    "data/experiments/native/native",
    "data/experiments/native/us",
    "data/experiments/native/world",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px
#     from_cache=False
)

#### Asian

In [None]:
test(
    "data/experiments/asian/european-american",
    "data/experiments/asian/asian-american",
    "data/experiments/asian/american",
    "data/experiments/asian/foreign",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px
#     from_cache=False
)

#### Insect-Flower

In [None]:
from ieat.api import test

test(
    "data/experiments/insect-flower/flower",
    "data/experiments/insect-flower/insect",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Weight

In [None]:
from ieat.api import test

test(
    "data/experiments/weight/thin",
    "data/experiments/weight/fat",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Skin-Tone

In [None]:
from ieat.api import test

test(
    "data/experiments/skin-tone/light",
    "data/experiments/skin-tone/dark",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Disability

In [None]:
from ieat.api import test

test(
    "data/experiments/disabled/disabled",
    "data/experiments/disabled/abled",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Presidents

In [None]:
from ieat.api import test

test(
    "data/experiments/presidents/trump",
    "data/experiments/presidents/kennedy",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

In [None]:
from ieat.api import test

test(
    "data/experiments/presidents/trump",
    "data/experiments/presidents/clinton",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

In [None]:
from ieat.api import test

test(
    "data/experiments/presidents/trump",
    "data/experiments/presidents/bush",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

In [None]:
from ieat.api import test

test(
    "data/experiments/presidents/trump",
    "data/experiments/presidents/lincoln",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Religion

In [None]:
from ieat.api import test

test(
    "data/experiments/religion/christianity",
    "data/experiments/religion/judaism",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Gender-Science

In [None]:
from ieat.api import test

test(
    "data/experiments/gender/science",
    "data/experiments/gender/liberal-arts",
    "data/experiments/gender/male",
    "data/experiments/gender/female",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Gender-Career

In [None]:
from ieat.api import test

test(
    "data/experiments/gender/career",
    "data/experiments/gender/family",
    "data/experiments/gender/male",
    "data/experiments/gender/female",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Sexuality

In [None]:
from ieat.api import test

test(
    "data/experiments/sexuality/gay",
    "data/experiments/sexuality/straight",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Race

In [None]:
from ieat.api import test

test(
    "data/experiments/race/african-american",
    "data/experiments/race/european-american",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Arab-Muslim

In [None]:
from ieat.api import test

test(
    "data/experiments/arab-muslim/other-people",
    "data/experiments/arab-muslim/arab-muslim",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

#### Age

In [None]:
from ieat.api import test

test(
    "data/experiments/age/young",
    "data/experiments/age/old",
    "data/experiments/valence/pleasant",
    "data/experiments/valence/unpleasant",
    model_type="openai", 
    model_size=model_size,
    models_dir=models_dir,
    clusters_dir=color_clusters_dir,
    n_px=n_px,
#     from_cache=False,
    verbose=True
)

### Download Cached Embeddings

In [None]:
# to download from colab
from google.colab import files

!zip -r embeddings_colab.zip embeddings
files.download("embeddings_colab.zip")