In [1]:
import argparse
import csv
import logging
import os
import random
import sys
import pickle
import pdb

import numpy as np
import torch
from tqdm import tqdm, trange

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import transformers
from transformers import BertTokenizer, BertModel

import json
import pickle

import random
import numpy.linalg as LA
import copy


import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

import statistics
import math

############ path of Font #############

fontpath = '/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc'
fontprop = fm.FontProperties(fname=fontpath, size=9)

In [2]:
tokenizer = BertTokenizer.from_pretrained('klue/bert-base')
model = BertModel.from_pretrained('klue/bert-base')

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
def cossim(x, y):
    return np.dot(x, y) / math.sqrt(np.dot(x, x) * np.dot(y, y))

def s_wAB(w, A, B):
    meana = 0
    meanb = 0
    for a in A:
        meana +=cossim(w,a)
    for b in B:
        meanb +=cossim(w,b)
    meana /= len(A)
    meanb /= len(B)
    return meana-meanb
    
def mean_s_wAB(X, A, B):
    mean = 0
    for x in X:
        mean += s_wAB(x, A, B)
    return mean/len(X)

def stdev_s_wAB(X, A, B):
    T = []
    for x in X:
        T.append(s_wAB(x,A,B))
    return statistics.stdev(T)

def effect_size(X,Y,A,B):
    x = mean_s_wAB(X,A,B)
    y= mean_s_wAB(Y,A,B)
    
    return (x-y)/stdev_s_wAB(X+Y,A,B)

In [4]:
def test_seat(targetName, attrName, bias_subspace=[]):
    X = []
    Y = []
    A = []
    B = []

    with open(targetName+"/"+"target_X.txt") as f:
        for line in f.readlines():
            v = model((tokenizer(line.strip(), return_tensors="pt")).input_ids).last_hidden_state[0][0].detach().numpy()  
            hv = v*0
            for bias_vector in bias_subspace:
                hv = hv + np.dot(bias_vector, v)*bias_vector
           
            v2=v-hv
            X.append(v2/LA.norm(v2))
            

    with open(targetName+"/"+"target_Y.txt") as f:
        for line in f.readlines():
            v = model((tokenizer(line.strip(), return_tensors="pt")).input_ids).last_hidden_state[0][0].detach().numpy()  
            hv = v*0
            for bias_vector in bias_subspace:
                hv = hv + np.dot(bias_vector, v)*bias_vector
           
            v2=v-hv
            Y.append(v2/LA.norm(v2))

    with open(attrName+"/"+"attr_A.txt") as f:
        for line in f.readlines():
            v = model((tokenizer(line.strip(), return_tensors="pt")).input_ids).last_hidden_state[0][0].detach().numpy()  
            hv = v*0
            for bias_vector in bias_subspace:
                hv = hv + np.dot(bias_vector, v)*bias_vector
            
            v2=v-hv
            A.append(v2/LA.norm(v2))

    with open(attrName+"/"+"attr_B.txt") as f:
        for line in f.readlines():
            v = model((tokenizer(line.strip(), return_tensors="pt")).input_ids).last_hidden_state[0][0].detach().numpy()  
            hv = v*0
            for bias_vector in bias_subspace:
                hv = hv + np.dot(bias_vector, v)*bias_vector
            
            v2=v-hv
            B.append(v2/LA.norm(v2))
    
    
    if(len(X)!=len(Y)):
        print("length of X and Y should be the same")
        assert()

    
    if(len(A)!=len(B)):
        print("length of A and B are recommended to be the same")
        
    return effect_size(X,Y,A,B)

In [5]:
bias_subspace = torch.load("bias_subspace_d2_opt_20.pt")
print(bias_subspace)

[[-0.04021164 -0.00959634  0.04158581 ... -0.00519618 -0.01959306
  -0.0421946 ]
 [-0.02284452  0.0072754   0.00746128 ... -0.02240542  0.03168522
  -0.01751719]
 [-0.05221439 -0.00474177  0.02527822 ...  0.02411237  0.0505751
  -0.04409362]
 ...
 [-0.00406621 -0.05929296 -0.00297973 ...  0.04360102  0.08920283
   0.03231892]
 [-0.08230173 -0.01784659 -0.03371129 ...  0.01286659 -0.03928048
  -0.03498467]
 [-0.02999273  0.07737284 -0.02323433 ... -0.01267975  0.02910168
  -0.04029364]]


In [6]:
targetNames = ["name"]
attrNames = ["job","hobby","specialty"]
for targetName in targetNames:
    for attrName in attrNames:
        print("target ",targetName," + attr ",attrName, " original SEAT score : ",test_seat(targetName,attrName))
        print("target ",targetName," + attr ",attrName, " debiased SEAT score : ",test_seat(targetName,attrName,bias_subspace))
        print()

target  name_phx  + attr  specialty_px  original SEAT score :  0.3751922434255145
target  name_phx  + attr  specialty_px  debiased SEAT score :  -0.036792635490124674

target  name_phx  + attr  job_px  original SEAT score :  0.955967604637584
target  name_phx  + attr  job_px  debiased SEAT score :  0.8998437847817552

target  name_phx  + attr  like_px  original SEAT score :  0.8354605087978312
target  name_phx  + attr  like_px  debiased SEAT score :  0.7027839570029614

