In [31]:
from __future__ import print_function, division
%matplotlib inline
from matplotlib import pyplot as plt
import json
import random
import numpy as np

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_professions

# load google news word2vec
E = WordEmbedding('./embeddings/w2v_gnews_small.txt')
E_1 = E

# load professions
# professions = load_professions()
# profession_words = [p[0] for p in professions]

*** Reading data from ./embeddings/w2v_gnews_small.txt
(26423, 300)
26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine


Define Socioeconomic Direction

In [29]:
terms = ["wealthy", "impoverished", "rich", "poor", "wealth", "poverty"]
# terms = ["national", "international", "citizen", "immigrant","citizenship", "visa", "native", "alien", "domestic", "foreign"]
terms_group1 = [terms[2 * i] for i in range(len(terms) // 2)]
terms_group2 = [terms[2 * i + 1] for i in range(len(terms) // 2)]

vs = [sum(E.v(w) for w in terms) for terms in (terms_group2, terms_group1)]
vs = [v / np.linalg.norm(v) for v in vs]

v_eco = vs[1] - vs[0]
v_eco = v_eco / np.linalg.norm(v_eco)

Generating Socioeconomic Based Analogies

In [36]:
# socioeconomical analogies
a_eco = E.best_analogies_dist_thresh(v_eco)

for (a,b,c) in a_eco:
    print(a+"-"+b)

citizenship-visa
national-international
guardsmen-marines
creature-alien
democracy-dictators
doctorate-postgraduate
socialism-imperialist
goats-camels
deplorable-inhuman
while-whilst
permits-visas
nation-continent
homeowners-landlords
native-natives
woodlands-woodland
steelmaker-iron_ore
watershed-estuary
pipped-fancied
proclamation-edict
rifle-machine_guns
residents-locals
roadways-parking_lots
motorist-tow_truck
nationals-foreigners
woman-prostitute
halfback-flyhalf
vitality-dynamism
lieutenants-henchmen
homes-condos
nationally-internationally
dubious-dodgy
fishes-sharks
citizens-tax_payers
knee_injury-groin_injury
championship-postseason
choreographer-choreographers
trees-weeds
country-globe
heartwarming-captivating
grandmother-aunts
conservatism-fundamentalism
rural-outlying
belt-strap
expatriates-expatriate
illegal_aliens-aliens
batting-batsmen
lowest-weakest
garnered-attracted
evils-evil
identity_theft-fraudsters
integrator-integrators
businessman-businessmen
soft_spoken-amiable


Debiasing

In [39]:
from debiaswe.debias import debias

definitional_pairs = [['rich','poor'], ['wealthy', 'impoverished'], ['educated','uneducated'], ['millionaires','migrant_workers'], ['mansions','affordable_housing'], ['limo','bus'],['luxuries','basic_necessities'],['wealthiest','poorest'], ['wealthier','poorer']]
# equalized_pairs = [['rich','poor'], ['wealthy', 'impoverished']]
# eco_specific_seed = ['mansion', 'yachts', 'villas', 'homelessness', 'millionaires', 'slums', 'migrant-workers', 'rich', 'affordable_housing', 'homeless_shelter', 'motel', 'limo']

debias(E, eco_specific_seed, definitional_pairs, equalized_pairs)

26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine
{('wealthy', 'impoverished'), ('RICH', 'POOR'), ('rich', 'poor'), ('WEALTHY', 'IMPOVERISHED'), ('Wealthy', 'Impoverished'), ('Rich', 'Poor')}
26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine


In [18]:
# analogies socioeconomic
a_eco_debiased = E.best_analogies_dist_thresh(v_eco)

for (a,b,c) in a_eco_debiased:
    print(a+"-"+b)

Computing neighbors
Mean: 10.24009385762404
Median: 7.0
illiterate-educated
millionaires-billionaires
affordable_housing-homelessness
villas-townhouses
slum-slums
migrant_workers-illegal_immigrants
laborers-wage_earners
wealthy-poverty
mansion-townhouse
yachts-sailboat
thirst-hunger
rich-poor
businessman-millionaire
job_seekers-unemployed
migrants-illegal_immigrant
border_crossings-crossings
surpluses-deficit
fruits-veggies
emptiness-hopelessness
authorities-officials
garments-jeans
untapped-underserved
sensitive-touchy
negligible-marginal
smells-stink
geological-mineral_resource
evocative-uplifting
civilization-tyranny
shelter-homeless
wastewater-sewer
apply-qualify
fluids-hydration
melodies-song
vast-colossal
breezes-windy_conditions
downturns-recession
pornographic-obscenity
feedstock-ethanol
tailback-wide_receiver
animals-animal_cruelty
soprano-ballerina
vile-pathetic
sorrow-despair
profits-profit
fruit-potato
cafes-coffee_shops
pears-peas
enhance-improve
cholera-malaria
oilfields-

Define Nationality Direction

In [32]:
terms = ["national", "international", "citizen", "immigrant","citizenship", "visa", "native", "alien", "domestic", "foreign"]
terms_group1 = [terms[2 * i] for i in range(len(terms) // 2)]
terms_group2 = [terms[2 * i + 1] for i in range(len(terms) // 2)]

vs = [sum(E_1.v(w) for w in terms) for terms in (terms_group2, terms_group1)]
vs = [v / np.linalg.norm(v) for v in vs]

v_nat = vs[1] - vs[0]
v_nat = v_nat / np.linalg.norm(v_nat)

Generating Nationality Based Analogies

In [33]:
# nationality analogies
a_nat = E_1.best_analogies_dist_thresh(v_nat)

for (a,b,c) in a_nat:
    print(a+"-"+b)

Computing neighbors
Mean: 10.219732808538016
Median: 7.0
citizenship-visa
national-international
creature-alien
guardsmen-marines
socialism-imperialist
goats-camels
democracy-dictators
master_degree-postgraduate
nation-continent
deplorable-inhuman
homeowners-landlords
while-whilst
watershed-estuary
permits-visas
residents-locals
rifle-machine_guns
woman-prostitute
patriots-revolutionaries
steelmaker-iron_ore
halfback-flyhalf
mass_transit-subway
vitality-dynamism
proclamation-edict
woodlands-woodland
native-natives
conservatism-fundamentalism
lieutenants-henchmen
reform-liberalization
dubious-dodgy
lowest-weakest
pipped-fancied
championship-postseason
identity_theft-fraudsters
monument-tomb
nationally-internationally
democratic-repressive
drivers-taxi_drivers
motorist-tow_truck
states-countries
communities-villages
roadways-parking_lots
academic_excellence-academic
fishes-sharks
nationals-foreigners
broadband-telecom_operators
integrator-integrators
businessman-businessmen
tourist_desti

Debiasing Nationality Bias

In [43]:
from debiaswe.debias import debias

nat_definitional_pairs = [['national','international'],['domestic','foreign'], ['native','alien'], ['citizenship','visa'], ['citizen','immigrant'],['familiar','exotic'], ['home','overseas'], ['national', 'abroad'], ['nationals','foreigners'], ['citizens','immigrants']]
nat_specific_seed = []
equalized_pairs = []

debias(E_1, nat_specific_seed, nat_definitional_pairs, equalized_pairs)

26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine
set()
26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine


In [None]:
# analogies socioeconomic
a_nat_debiased = E_1.best_analogies_dist_thresh(v_nat)

for (a,b,c) in a_nat_debiased:
    print(a+"-"+b)

Computing neighbors
