In [25]:
import numpy as np
import pandas as pd
import csv
import math


In [26]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [27]:
# Original paper code (compiled with Python 3) but used only for some data (see Data-Sets section)
!git clone https://github.com/neta-caspi/advanced-ml-final-proj-original-paper-code.git


fatal: destination path 'advanced-ml-final-proj-original-paper-code' already exists and is not an empty directory.


## Data-Sets

Part of the original paper data resides within its Github project (see above)
Main parts, including the historical database of texts and embeddings are taken from [here](http://snap.stanford.edu/historical_embeddings/).

While reading this data from the remote location works for *.pkl files, it doesn't work for the numpy files (*.npy).
* This works: pd.read_pickle("http://snap.stanford.edu/historical_embeddings/eng-all/svd/1840-vocab.pkl")

* But this fails: np.load("http://snap.stanford.edu/historical_embeddings/eng-all/svd/1840-w.npy")

For this reaosn, we downloaded all the embeddings and word counts to google drive (too big to load to GitHub).

In order to reconstract part of the paper results, we used the eng-all data-set.
We used both the SVD and SGNS versions of embeddings.


In [38]:
### This might requires to be updated to your gdrive location (after the share we did)
gdrive_data_root = "/content/gdrive/MyDrive/Data Science Master/2021-B Advanced Machine Learning/final project/Final Project Data-Set/eng-all/"

pkl_vocab_file_postfix = "-vocab.pkl"
npy_vec_file_postfix = "-w.npy"

internal_data_root = "/content/advanced-ml-final-proj-original-paper-code/data/"
internal_vocab_dir = internal_data_root + "vocab_counts/"
vocab_file_prefix = "vocab_"

occup_gender_file = internal_data_root + "occupation_percentages_gender_occ1950.csv"
occup_race_file = internal_data_root + "occupation_percentages_race_occ1950.csv"


In [39]:
from enum import Enum

class Embedding(Enum):
  SGNS = "sgns"
  SVD = "svd"

  def __str__(self):
    return self.name.lower()

years = range(1910, 2000, 10)


Graphes preparation code is not part of this notebook. It can be added (it is in a different notebook).

## Vocab Frequencies

In [40]:
def internal_vocab_file_name(embedding: Embedding, year):
  return internal_vocab_dir + vocab_file_prefix + str(embedding) + str(year) + ".txt"

def internal_words_group_file_name(words_group_name):
  return internal_data_root + words_group_name + ".txt"

def gdrive_vocab_pkl_file_name(embedding: Embedding, year):
  return gdrive_data_root + str(embedding) + "/" + str(year) + pkl_vocab_file_postfix

def gdrive_vocab_npy_file_name(embedding: Embedding, year):
  return gdrive_data_root + str(embedding) + "/" + str(year) + npy_vec_file_postfix


In [41]:
def word_count_avg(words, words_count_dict, dict_file_name):
  num_words = len(words)
  count_sum = 0.0
  for word in words:
    count = words_count_dict.get(word)
    if count is None:
      # print("word " + word + " doesn't appear in " + dict_file_name)
      count = 0
    count_sum += count
  return count_sum / num_words


In [42]:
def words_count_avg_per_year(words_groups_names, embedding: Embedding, years):
  words_groups_to_yearly_avg_count = {}
  for words_group_name in words_groups_names:
    words_group_file = internal_words_group_file_name(words_group_name)
    words = [ word.strip() for word in open(words_group_file) ]
    avg_words_count_per_year = {}
    for year in years:
      yearly_vocab_file = internal_vocab_file_name(embedding, year)
      yearly_vocab_dict = { line.split()[0] : float(line.split()[1]) for line in open(yearly_vocab_file) }
      avg_words_count_per_year[year] = word_count_avg(words, yearly_vocab_dict, yearly_vocab_file)
    
    words_groups_to_yearly_avg_count[words_group_name] = avg_words_count_per_year
        
  return words_groups_to_yearly_avg_count


### Average counts of Neutral Words in a list in the eng-all embeddings over time

In [43]:
words_groups = ['adjectives_appearance', 'adjectives_intelligencegeneral', 'adjectives_otherization'] #...

words_groups_to_yearly_avg_count = words_count_avg_per_year(words_groups, Embedding.SGNS, years)
print(words_groups_to_yearly_avg_count)

words_groups_to_yearly_avg_count = words_count_avg_per_year(words_groups, Embedding.SVD, years)
print(words_groups_to_yearly_avg_count)

{'adjectives_appearance': {1910: 56231.9, 1920: 48235.76, 1930: 39105.34, 1940: 36523.8, 1950: 50871.26, 1960: 84613.18, 1970: 100976.84, 1980: 130213.16, 1990: 208316.94}, 'adjectives_intelligencegeneral': {1910: 22071.535714285714, 1920: 20064.035714285714, 1930: 16830.0, 1940: 15015.642857142857, 1950: 21609.803571428572, 1960: 36624.57142857143, 1970: 40522.19642857143, 1980: 46163.642857142855, 1990: 71400.30357142857}, 'adjectives_otherization': {1910: 5178.775, 1920: 5029.625, 1930: 3966.1, 1940: 3878.275, 1950: 5758.575, 1960: 10772.35, 1970: 13363.025, 1980: 16336.15, 1990: 25496.85}}
{'adjectives_appearance': {1910: 56231.9, 1920: 48235.76, 1930: 39105.34, 1940: 36523.8, 1950: 50871.26, 1960: 84613.18, 1970: 100976.84, 1980: 130213.16, 1990: 208316.94}, 'adjectives_intelligencegeneral': {1910: 22071.535714285714, 1920: 20064.035714285714, 1930: 16830.0, 1940: 15015.642857142857, 1950: 21609.803571428572, 1960: 36624.57142857143, 1970: 40522.19642857143, 1980: 46163.6428571428

### Average counts of Group Words (Gender and Race related) in a list in the eng-all embeddings over time

In [44]:
words_groups = ['female_pairs', 'male_pairs', 'names_asian', 'names_black', 'names_chinese', 'names_hispanic', 'names_russian', 'names_white' ]

words_groups_to_yearly_avg_count = words_count_avg_per_year(words_groups, Embedding.SGNS, years)
print(words_groups_to_yearly_avg_count)

words_groups_to_yearly_avg_count = words_count_avg_per_year(words_groups, Embedding.SVD, years)
print(words_groups_to_yearly_avg_count)

{'female_pairs': {1910: 769143.65, 1920: 729429.025, 1930: 635999.85, 1940: 540830.3, 1950: 749410.1, 1960: 1110819.8, 1970: 1610053.95, 1980: 2980188.45, 1990: 5425153.825}, 'male_pairs': {1910: 3141951.975, 1920: 2941354.125, 1930: 2611906.85, 1940: 2300660.375, 1950: 3352361.9, 1960: 5567057.25, 1970: 6201595.05, 1980: 7312107.9, 1990: 10929249.475}, 'names_asian': {1910: 1805.95, 1920: 2376.95, 1930: 2915.725, 1940: 2967.125, 1950: 6968.55, 1960: 18138.375, 1970: 26447.55, 1980: 34504.275, 1990: 56367.775}, 'names_black': {1910: 26024.375, 1920: 26683.825, 1930: 27028.65, 1940: 27811.275, 1950: 43545.65, 1960: 93735.5, 1970: 132841.85, 1980: 174531.025, 1990: 267635.75}, 'names_chinese': {1910: 1400.0294117647059, 1920: 1886.764705882353, 1930: 2633.9117647058824, 1940: 2358.4117647058824, 1950: 5317.941176470588, 1960: 13577.411764705883, 1970: 21378.20588235294, 1980: 28827.529411764706, 1990: 51607.05882352941}, 'names_hispanic': {1910: 1428.625, 1920: 1501.775, 1930: 1512.35, 1

## Embedding Bias

In [45]:
def vec_mean(vec_list):
  return np.mean(vec_list, axis=0)


def vec_dist(vec1, vec2):
  return np.linalg.norm(vec1 - vec2)


def vec_to_vec_list_dist(vec, vec_list):
  dists = []
  for vec2 in vec_list:
    dists.append(vec_dist(vec, vec2))
  
  return np.mean(dists)


def word_to_vec(words_group_name, embedding: Embedding, year):
  yearly_words_file = gdrive_vocab_pkl_file_name(embedding, year)
  yearly_words_vec_file = gdrive_vocab_npy_file_name(embedding, year)
  yearly_words = pd.read_pickle(yearly_words_file)
  yearly_words_vec = np.load(yearly_words_vec_file)
  word_to_vec_all = { yearly_words[i]: yearly_words_vec[i] for i in range(len(yearly_words)) }

  words_group_file = internal_words_group_file_name(words_group_name)
  words = [ word.strip() for word in open(words_group_file) ]
  word_to_vec = {}
  for word in words:
    try:
      vec = word_to_vec_all[word]
      word_to_vec[word] = vec
    except KeyError:
      print("word " + word + " from group " + words_group_name + " doesn't appear in " + yearly_words_file)
      pass

  return word_to_vec


In [46]:
def word_groups_vec_dist(neutral_words_group_name, words_group_a_name, words_group_b_name, embedding: Embedding, years):
  group_a_dist_per_year = {}
  for year in years:
    print("Calculating distance for " + str(year) + "...")
    neutral_words_to_vec = word_to_vec(neutral_words_group_name, embedding, year)
    group_a_words_to_vec = word_to_vec(words_group_a_name, embedding, year)
    group_b_words_to_vec = word_to_vec(words_group_b_name, embedding, year)

    group_a_mean_vec = vec_mean(list(group_a_words_to_vec.values()))
    group_b_mean_vec = vec_mean(list(group_b_words_to_vec.values()))

    group_a_dist = vec_to_vec_list_dist(group_a_mean_vec, neutral_words_to_vec.values())
    group_b_dist = vec_to_vec_list_dist(group_b_mean_vec, neutral_words_to_vec.values())
    group_a_dist_per_year[year] = group_a_dist - group_b_dist
  
  return group_a_dist_per_year


### Embedding Bias of a given neutral words group between gender over time

In [47]:
neutral_words_group_name = "occupations1950_professional" # "occupations1950", "occupationsMturk"
words_group_a_name = "female_pairs"
words_group_b_name = "male_pairs"

dist_per_year = word_groups_vec_dist(neutral_words_group_name, words_group_a_name, words_group_b_name, Embedding.SVD, years)
print(dist_per_year)

Calculating distance for 1910...
Calculating distance for 1920...
Calculating distance for 1930...
Calculating distance for 1940...
Calculating distance for 1950...
Calculating distance for 1960...
Calculating distance for 1970...
Calculating distance for 1980...
Calculating distance for 1990...
{1910: 0.08152181939167735, 1920: 0.08262527773844575, 1930: 0.0812332730101386, 1940: 0.08361938998270446, 1950: 0.08968009083168949, 1960: 0.09564422990129118, 1970: 0.08055221740619944, 1980: 0.06334474071547236, 1990: 0.05240756246568501}


### Embedding Bias of a given neutral words group between races over time

In [49]:
neutral_words_group_name = "occupations1950_professional" # "occupations1950", "occupationsMturk"
words_group_a_name = "names_asian" # "names_black", "names_chinese", "names_hispanic", "names_russian"
words_group_b_name = "names_white"

dist_per_year = word_groups_vec_dist(neutral_words_group_name, words_group_a_name, words_group_b_name, Embedding.SVD, years)
print(dist_per_year)

Calculating distance for 1910...
word cho from group names_asian doesn't appear in /content/gdrive/MyDrive/Data Science Master/2021-B Advanced Machine Learning/final project/Final Project Data-Set/eng-all/svd/1910-vocab.pkl
word wong from group names_asian doesn't appear in /content/gdrive/MyDrive/Data Science Master/2021-B Advanced Machine Learning/final project/Final Project Data-Set/eng-all/svd/1910-vocab.pkl
word huang from group names_asian doesn't appear in /content/gdrive/MyDrive/Data Science Master/2021-B Advanced Machine Learning/final project/Final Project Data-Set/eng-all/svd/1910-vocab.pkl
word chu from group names_asian doesn't appear in /content/gdrive/MyDrive/Data Science Master/2021-B Advanced Machine Learning/final project/Final Project Data-Set/eng-all/svd/1910-vocab.pkl
word chung from group names_asian doesn't appear in /content/gdrive/MyDrive/Data Science Master/2021-B Advanced Machine Learning/final project/Final Project Data-Set/eng-all/svd/1910-vocab.pkl
word ng

## Gender and Race Census Occupation rates

2 versions: non relative (with or without logit_prop) and relative (can be negative thus no logit_prop option)

In [60]:
def logit_prop(p):
  return math.log(p/(1-p))


def gender_occup_percent_per_year(occup, years, apply_logit_prop):
  df = pd.read_csv(occup_gender_file)
  occup_per_year = {}
  for year in years:
    p = df.loc[(df['Census year'] == year) & (df['Occupation'] == occup), 'Female'].mean()
    if apply_logit_prop:
      p = logit_prop(p)
    occup_per_year[year] = p
  return occup_per_year


def gender_occup_relative_percent_per_year(occup, years):
  df = pd.read_csv(occup_gender_file)
  occup_per_year = {}
  for year in years:
    # female - male = female - (1 - female) = 2 * female - 1
    p = df.loc[(df['Census year'] == year) & (df['Occupation'] == occup), 'Female'].mean()
    p = ((2 * p) - 1) * 100
    # male_series = df.loc[(df['Census year'] == year) & (df['Occupation'] == occup), 'Male']
    # p = (female_series - male_series).mean() * 100
    occup_per_year[year] = p
  return occup_per_year


def gender_all_occups_percent_per_year(years, apply_logit_prop):
  df = pd.read_csv(occup_gender_file)
  all_occup_per_year = {}
  for year in years:
    p = df.loc[df['Census year'] == year, 'Female'].mean()
    if apply_logit_prop:
      p = logit_prop(p)
    all_occup_per_year[year] = p
  return all_occup_per_year
  

def gender_all_occups_relative_percent_per_year(years):
  df = pd.read_csv(occup_gender_file)
  all_occup_per_year = {}
  for year in years:
    p = df.loc[df['Census year'] == year, 'Female'].mean()
    p = ((2 * p) - 1) * 100
    all_occup_per_year[year] = p
  return all_occup_per_year


def gender_all_occups_percent(years, apply_logit_prop):
  df = pd.read_csv(occup_gender_file)
  p = df.loc[df['Census year'].isin(list(years)), 'Female'].mean()
  if apply_logit_prop:
    p = logit_prop(p)
  return p


def gender_all_occups_relative_percent(years):
  df = pd.read_csv(occup_gender_file)
  p = df.loc[df['Census year'].isin(list(years)), 'Female'].mean()
  p = ((2 * p) - 1) * 100
  return p


In [61]:
def cond_logit_prop(p1, p2):
  return logit_prop(p1 / (p1 + p2))


def race_occup_percent_per_year(race, occup, years, apply_logit_prop, relative_race = 'white'):
  df = pd.read_csv(occup_race_file)
  occup_per_year = {}
  for year in years:
    p1 = df.loc[(df['Census year'] == year) & (df['Occupation'] == occup), race].mean()
    p = p1
    if apply_logit_prop:
      p2 = df.loc[(df['Census year'] == year) & (df['Occupation'] == occup), relative_race].mean()
      p = cond_logit_prop(p1, p2)
    occup_per_year[year] = p
  return occup_per_year


def race_occup_relative_percent_per_year(race, occup, years, relative_race = 'white'):
  df = pd.read_csv(occup_race_file)
  occup_per_year = {}
  for year in years:
    race_series = df.loc[(df['Census year'] == year) & (df['Occupation'] == occup), race]
    relative_race_series = df.loc[(df['Census year'] == year) & (df['Occupation'] == occup), relative_race]
    p = (race_series - relative_race_series).mean() * 100
    occup_per_year[year] = p
  return occup_per_year


def race_all_occups_percent_per_year(race, years, apply_logit_prop, relative_race = 'white'):
  df = pd.read_csv(occup_race_file)
  all_occup_per_year = {}
  for year in years:
    p1 = df.loc[df['Census year'] == year, race].mean()
    p = p1
    if apply_logit_prop:
      p2 = df.loc[df['Census year'] == year, relative_race].mean()
      p = cond_logit_prop(p1, p2)
    all_occup_per_year[year] = p
  return all_occup_per_year


def race_all_occups_relative_percent_per_year(race, years, relative_race = 'white'):
  df = pd.read_csv(occup_race_file)
  all_occup_per_year = {}
  for year in years:
    race_series = df.loc[df['Census year'] == year, race]
    relative_race_series = df.loc[df['Census year'] == year, relative_race]
    p = (race_series - relative_race_series).mean() * 100
    all_occup_per_year[year] = p
  return all_occup_per_year


def race_all_occups_percent(race, years, apply_logit_prop, relative_race = 'white'):
  df = pd.read_csv(occup_race_file)
  p1 = df.loc[df['Census year'].isin(list(years)), race].mean()
  p = p1
  if apply_logit_prop:
    p2 = df.loc[(df['Census year'].isin(list(years))), relative_race].mean()
    p = cond_logit_prop(p1, p2)
  return p


def race_all_occups_relative_percent(race, years, relative_race = 'white'):
  df = pd.read_csv(occup_race_file)
  race_series = df.loc[df['Census year'].isin(list(years)), race]
  relative_race_series = df.loc[df['Census year'].isin(list(years)), relative_race]
  p = (race_series - relative_race_series).mean() * 100
  return p


In [62]:
years = range(1910, 2000, 10)


### Specific Occupation, Per Year for both Gender and Race (relative or not and with/without logit_prop)

In [63]:
apply_logit_prop = False # can be set to True

teacher_per_years_female_mean = gender_occup_percent_per_year("teacher", years, apply_logit_prop)
print("teacher: ", teacher_per_years_female_mean)

engineer_per_years_female_mean = gender_occup_percent_per_year("engineer", years, apply_logit_prop)
print("engineer: ", engineer_per_years_female_mean)


teacher:  {1910: 0.7957860473762761, 1920: 0.8373972240362563, 1930: 0.8116073542896325, 1940: 0.7474501500241553, 1950: 0.7446611083061556, 1960: 0.7694241844384666, 1970: 0.7391928688513486, 1980: 0.7161517431159874, 1990: 0.7439580297756162}
engineer:  {1910: 0.001836571246703172, 1920: 0.0015112556317028042, 1930: 0.006289495790905207, 1940: 0.00977273024833022, 1950: 0.014017321664352414, 1960: 0.00960499276961717, 1970: 0.01793458059719985, 1980: 0.06015052930018154, 1990: 0.11629365157906454}


In [64]:
# Relative
teacher_per_years_female_mean = gender_occup_relative_percent_per_year("teacher", years)
print("teacher: ", teacher_per_years_female_mean)

engineer_per_years_female_mean = gender_occup_relative_percent_per_year("engineer", years)
print("engineer: ", engineer_per_years_female_mean)

teacher:  {1910: 59.15720947525522, 1920: 67.47944480725126, 1930: 62.3214708579265, 1940: 49.490030004831056, 1950: 48.932221661231125, 1960: 53.884836887693325, 1970: 47.83857377026972, 1980: 43.23034862319748, 1990: 48.79160595512324}
engineer:  {1910: -99.63268575065936, 1920: -99.69774887365944, 1930: -98.74210084181895, 1940: -98.04545395033396, 1950: -97.19653566712951, 1960: -98.07900144607656, 1970: -96.41308388056002, 1980: -87.96989413996369, 1990: -76.74126968418709}


In [65]:
apply_logit_prop = True # can be set to False

teacher_per_years_hispanic_mean = race_occup_percent_per_year("hispanic", "engineer", years, apply_logit_prop)
print("hispanic: ", teacher_per_years_hispanic_mean)

engineer_per_years_black_mean = race_occup_percent_per_year("black", "engineer", years, apply_logit_prop)
print("black: ", engineer_per_years_black_mean)

hispanic:  {1910: -5.592854895831354, 1920: -5.379446201726423, 1930: -5.13315887029662, 1940: -5.262373184461322, 1950: -4.878229453573142, 1960: -4.378122514777813, 1970: -4.010624645081053, 1980: -3.6497317817713713, 1990: -3.3172715745955657}
black:  {1910: -4.517801389035936, 1920: -4.645612273233028, 1930: -4.955978098089973, 1940: -4.677847835128214, 1950: -4.701647886632909, 1960: -4.82690649760482, 1970: -4.169875080973696, 1980: -3.266838348626962, 1990: -3.042133926871644}


In [66]:
# Relative (by default vs. 'white' group. Can be changed via a parameter)

teacher_per_years_hispanic_mean = race_occup_relative_percent_per_year("hispanic", "engineer", years)
print("hispanic: ", teacher_per_years_hispanic_mean)

engineer_per_years_black_mean = race_occup_relative_percent_per_year("black", "engineer", years)
print("black: ", engineer_per_years_black_mean)

hispanic:  {1910: -97.90803056430217, 1920: -98.05882394147395, 1930: -97.99093296077956, 1940: -98.00655006724013, 1950: -97.51083456242458, 1960: -96.06210518304286, 1970: -93.47974228720109, 1980: -87.87810798131642, 1990: -83.67619589554548}
black:  {1910: -97.20157673815555, 1920: -97.56692157385864, 1930: -97.87823627028136, 1940: -97.60105190114673, 1950: -97.3664116065605, 1960: -96.50353908212193, 1970: -93.73373807458803, 1980: -86.78381300099946, 1990: -82.6793456747362}


### Mean all years

In [67]:
apply_logit_prop = False

all_occups_all_years_female_mean = gender_all_occups_percent(years, apply_logit_prop)
print("female: ", all_occups_all_years_female_mean)

all_occups_all_years_female_mean = gender_all_occups_percent_per_year(years, apply_logit_prop)
print("female: ", all_occups_all_years_female_mean)

female:  0.2352832439462348
female:  {1910: 0.17789598440206766, 1920: 0.19796426929215596, 1930: 0.19162541843619674, 1940: 0.1578338910528968, 1950: 0.21247870412711548, 1960: 0.24722308684251396, 1970: 0.2706836461862202, 1980: 0.3198965077263355, 1990: 0.35336634093463337}


In [68]:
# Relative

all_occups_all_years_female_mean = gender_all_occups_relative_percent(years)
print("female: ", all_occups_all_years_female_mean)

all_occups_all_years_female_mean = gender_all_occups_relative_percent_per_year(years)
print("female: ", all_occups_all_years_female_mean)

female:  -52.94335121075304
female:  {1910: -64.42080311958647, 1920: -60.407146141568816, 1930: -61.67491631276065, 1940: -68.43322178942064, 1950: -57.50425917457691, 1960: -50.55538263149722, 1970: -45.86327076275596, 1980: -36.0206984547329, 1990: -29.326731813073327}


In [69]:
apply_logit_prop = False

# all years mean
all_occups_all_years_hispanic_mean = race_all_occups_percent("hispanic", years, apply_logit_prop)
print("hispanic: ", all_occups_all_years_hispanic_mean)

all_occups_all_years_black_mean = race_all_occups_percent("black", years, apply_logit_prop)
print("black: ", all_occups_all_years_black_mean)

# and relative
all_occups_all_years_hispanic_mean = race_all_occups_relative_percent("hispanic", years)
print("hispanic: ", all_occups_all_years_hispanic_mean)

all_occups_all_years_black_mean = race_all_occups_relative_percent("black", years)
print("black: ", all_occups_all_years_black_mean)

# per year
all_occups_all_years_hispanic_mean = race_all_occups_percent_per_year("hispanic", years, apply_logit_prop)
print("hispanic: ", all_occups_all_years_hispanic_mean)

all_occups_all_years_black_mean = race_all_occups_percent_per_year("black", years, apply_logit_prop)
print("black: ", all_occups_all_years_black_mean)

all_occups_all_years_asian_mean = race_all_occups_percent_per_year("asian", years, apply_logit_prop)
print("asian: ", all_occups_all_years_asian_mean)

all_occups_all_years_asian_vs_hispanic_mean = race_all_occups_percent_per_year("asian", years, apply_logit_prop, "hispanic")
print("asian vs hispanic: ", all_occups_all_years_asian_vs_hispanic_mean)

# and relative example (can be negative)
all_occups_all_years_hispanic_mean = race_all_occups_relative_percent_per_year("hispanic", years)
print("hispanic: ", all_occups_all_years_hispanic_mean)


hispanic:  0.023258093404392693
black:  0.06374744132167387
hispanic:  -87.87672143280486
black:  -83.82778664107671
hispanic:  {1910: 0.005915974746676009, 1920: 0.007177339676832575, 1930: 0.010279470721832383, 1940: 0.01099297824140071, 1950: 0.0156580525159816, 1960: 0.021396161442331342, 1970: 0.030976671921375695, 1980: 0.04617470987336233, 1990: 0.06611045402203097}
black:  {1910: 0.048270258913171764, 1920: 0.04755116513289124, 1930: 0.053531493019688445, 1940: 0.05370935574951313, 1950: 0.07096388660778224, 1960: 0.0680673466743644, 1970: 0.0725822035048231, 1980: 0.07836652562545487, 1990: 0.08339541933516116}
asian:  {1910: 0.0029481347896677763, 1920: 0.0030079777467855104, 1930: 0.0032557631109124143, 1940: 0.0014460668810149417, 1950: 0.0024807746100990362, 1960: 0.005308901098543822, 1970: 0.009761096043769466, 1980: 0.01947860907825475, 1990: 0.029492577141561158}
asian vs hispanic:  {1910: 0.0029481347896677763, 1920: 0.0030079777467855104, 1930: 0.0032557631109124143,