In [1]:
import torch
import clip
from PIL import Image
import logging
import pandas as pd
import glob 
import os
from io import BytesIO
from PIL import UnidentifiedImageError
import subprocess
from weat.test import Test



In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14", device=device)

In [3]:
class dwebpException(Exception):
    pass

def dwebp(file: str):
    webp = subprocess.run(
        f"dwebp  {file} -quiet -o -", shell=True, capture_output=True
    )
    if webp.returncode != 0:
        raise dwebpException(webp.stderr.decode())
    else:
        return Image.open(BytesIO(webp.stdout))

def load_dir(path):
    tmp = []
    
    for file in glob.glob(path):
        with torch.no_grad():
            try:
                img = Image.open(file)
            except UnidentifiedImageError:
                if os.path.splitext(file)[1].lower() == ".webp":
                    img = dwebp(file)
                  
                else:
                    raise
            prep = preprocess(img).unsqueeze(0).to(device)
            emb = model.encode_image(prep)
            tmp.append(emb.cpu())
    return tmp

In [4]:
X_image = torch.cat(load_dir('./ieat/data/experiments/gender/male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/gender/female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/science/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/liberal-arts/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Science', 'B': 'Arts', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

02/07 01:42:51 PM: Computing cosine similarities...
02/07 01:42:51 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
02/07 01:42:51 PM: Computing pval...
02/07 01:42:51 PM: Using non-parametric test
02/07 01:42:51 PM: Drawing 9999 samples (and biasing by 1)
02/07 01:42:51 PM: pval: 0.0021
02/07 01:42:51 PM: computing effect size...
02/07 01:42:51 PM: esize: 0.628708


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Gender,Male,Female,Science,Arts,40,21,0.0021,0.628708


In [5]:
X_image = torch.cat(load_dir('./ieat/data/experiments/gender/male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/gender/female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/engineering/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/care/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Engineering', 'B': 'Caregiving', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

02/07 01:42:55 PM: Computing cosine similarities...
02/07 01:42:55 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
02/07 01:42:55 PM: Computing pval...
02/07 01:42:55 PM: Using non-parametric test
02/07 01:42:55 PM: Drawing 9999 samples (and biasing by 1)
02/07 01:42:55 PM: pval: 0.0053
02/07 01:42:55 PM: computing effect size...
02/07 01:42:55 PM: esize: 0.571415


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Gender,Male,Female,Engineering,Caregiving,40,12,0.0053,0.571415


In [6]:
X_image = torch.cat(load_dir('./ieat/data/experiments/gender/male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/gender/female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/career/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/family/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Gender', 'X': 'Male', 'Y': 'Female', 'A': 'Career', 'B': 'Family', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

02/07 01:43:01 PM: Computing cosine similarities...
02/07 01:43:01 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
02/07 01:43:01 PM: Computing pval...
02/07 01:43:01 PM: Using non-parametric test
02/07 01:43:01 PM: Drawing 9999 samples (and biasing by 1)
02/07 01:43:01 PM: pval: 0.0213
02/07 01:43:01 PM: computing effect size...
02/07 01:43:01 PM: esize: 0.452586


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Gender,Male,Female,Career,Family,40,21,0.0213,0.452586


In [7]:
X_image = torch.cat(load_dir('./ieat/data/experiments/race/european-american-male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/race/african-american-female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/science/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/liberal-arts/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'African-American', 'Y': 'European-American', 'A': 'Science', 'B': 'Arts', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

02/07 01:43:03 PM: Computing cosine similarities...
02/07 01:43:03 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
02/07 01:43:03 PM: Computing pval...
02/07 01:43:03 PM: Using non-parametric test
02/07 01:43:03 PM: Using exact test (20 partitions)
02/07 01:43:03 PM: Equalities contributed 1/20 to p-value
02/07 01:43:03 PM: pval: 0.1
02/07 01:43:03 PM: computing effect size...
02/07 01:43:03 PM: esize: 0.97818


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Ethnicity,African-American,European-American,Science,Arts,3,21,0.1,0.97818


In [8]:
X_image = torch.cat(load_dir('./ieat/data/experiments/race/european-american-male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/race/african-american-female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/engineering/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/care/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'African-American', 'Y': 'European-American', 'A': 'Engineering', 'B': 'Caregiving', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

02/07 01:43:04 PM: Computing cosine similarities...
02/07 01:43:04 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
02/07 01:43:04 PM: Computing pval...
02/07 01:43:04 PM: Using non-parametric test
02/07 01:43:04 PM: Using exact test (20 partitions)
02/07 01:43:04 PM: Equalities contributed 1/20 to p-value
02/07 01:43:04 PM: pval: 0.05
02/07 01:43:04 PM: computing effect size...
02/07 01:43:04 PM: esize: 1.56983


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Ethnicity,African-American,European-American,Engineering,Caregiving,3,12,0.05,1.569828


In [9]:
X_image = torch.cat(load_dir('./ieat/data/experiments/race/european-american-male/*'))
Y_image = torch.cat(load_dir('./ieat/data/experiments/race/african-american-female/*'))
A_image = torch.cat(load_dir('./ieat/data/experiments/gender/career/*'))
B_image = torch.cat(load_dir('./ieat/data/experiments/gender/family/*'))

test = Test(X_image, Y_image, A_image, B_image)
out = test.run()
        
df = pd.DataFrame([{'Name': 'Ethnicity', 'X': 'African-American', 'Y': 'European-American', 'A': 'Career', 'B': 'Family', 'n_t': X_image.shape[0], 'n_a': A_image.shape[0], 'p_i': out[1], 'd_i': out[0]}])
df

02/07 01:43:07 PM: Computing cosine similarities...
02/07 01:43:07 PM: Null hypothesis: no difference between X and Y in association to attributes A and B
02/07 01:43:07 PM: Computing pval...
02/07 01:43:07 PM: Using non-parametric test
02/07 01:43:07 PM: Using exact test (20 partitions)
02/07 01:43:07 PM: Equalities contributed 1/20 to p-value
02/07 01:43:07 PM: pval: 0.05
02/07 01:43:07 PM: computing effect size...
02/07 01:43:07 PM: esize: 1.47994


Unnamed: 0,Name,X,Y,A,B,n_t,n_a,p_i,d_i
0,Ethnicity,African-American,European-American,Career,Family,3,21,0.05,1.479936
