In [1]:
import pandas as pd
import numpy as np

# Racial composition for places in 1980 Census from IPUMS

In [2]:
VAL_COlS = {
    "C9D001":      "White",
    "C9D002":      "Black",
    "C9D003":      "American Indian, Eskimo, and Aleut: American Indian",
    "C9D004":      "American Indian, Eskimo, and Aleut: Eskimo",
    "C9D005":      "American Indian, Eskimo, and Aleut: Aleut",
    "C9D006":      "Asian and Pacific Islander: Japanese",
    "C9D007":      "Asian and Pacific Islander: Chinese",
    "C9D008":      "Asian and Pacific Islander: Filipino",
    "C9D009":      "Asian and Pacific Islander: Korean",
    "C9D010":      "Asian and Pacific Islander: Asian Indian",
    "C9D011":      "Asian and Pacific Islander: Vietnamese",
    "C9D012":      "Asian and Pacific Islander: Hawaiian",
    "C9D013":      "Asian and Pacific Islander: Guamanian",
    "C9D014":      "Asian and Pacific Islander: Samoan",
    "C9D015":      "Other",
    "C9G001":      "Spanish_Ori_White",
    "C9G002":      "Spanish_Ori_Black",
    "C9G003":      "Spanish_Ori_American Indian, Eskimo, Aleut, and Asian and Pacific Islander",
    "C9G004":      "Spanish_Ori_Other",
}
CONTEXT_COLS = ["GISJOIN","YEAR","REGIONA","DIVISIONA","STATE","STATEA","SMSAA","COUNTYA",
                "CTY_SUBA","PLACE","PLACEA","TRACTA","BLCK_GRPA","BLOCKA","EDINDA","ENUMDISTA",
                "SCSAA","URB_AREAA","CDA","AIANHHA","MCDSEQNOA","ZIPA","AREANAME"]
data = pd.read_csv("../../../data/raw/supplemental_data/nhgis0002_ds104_1980_place_race_homeownership.csv",
                    usecols=CONTEXT_COLS + list(VAL_COlS.keys()), 
                    dtype={col: str for col in CONTEXT_COLS})

data = data.rename(VAL_COlS, axis=1)

In [3]:
data.describe()

Unnamed: 0,White,Black,"American Indian, Eskimo, and Aleut: American Indian","American Indian, Eskimo, and Aleut: Eskimo","American Indian, Eskimo, and Aleut: Aleut",Asian and Pacific Islander: Japanese,Asian and Pacific Islander: Chinese,Asian and Pacific Islander: Filipino,Asian and Pacific Islander: Korean,Asian and Pacific Islander: Asian Indian,Asian and Pacific Islander: Vietnamese,Asian and Pacific Islander: Hawaiian,Asian and Pacific Islander: Guamanian,Asian and Pacific Islander: Samoan,Other,Spanish_Ori_White,Spanish_Ori_Black,"Spanish_Ori_American Indian, Eskimo, Aleut, and Asian and Pacific Islander",Spanish_Ori_Other
count,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0,22529.0
mean,5895.901,999.8481,35.155045,1.751742,0.558081,28.338453,33.501354,31.541214,13.923654,13.736917,10.648054,6.383195,1.277553,1.771361,269.564029,317.81313,14.982689,10.028142,233.740956
std,39230.75,18150.58,274.337389,38.990843,13.407727,863.440153,1097.942397,619.864539,297.371102,297.371247,158.167904,210.53058,22.245953,55.630871,6408.597744,6063.908657,615.875665,149.231995,5856.557597
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,384.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
50%,1162.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,7.0,0.0,0.0,1.0
75%,3711.0,101.0,10.0,0.0,0.0,2.0,1.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,22.0,37.0,1.0,1.0,11.0
max,4294075.0,1784337.0,16116.0,3856.0,1532.0,111228.0,124764.0,43713.0,33066.0,40945.0,13257.0,28176.0,2297.0,6972.0,749902.0,625130.0,90210.0,11938.0,681804.0


In [4]:
CA_data = data.loc[data["STATEA"] == "6"].copy()
CA_data["STATE"].unique()

array(['California'], dtype=object)

In [5]:
CA_data["total_pop"] = CA_data[list(VAL_COlS.values())].sum(axis=1)
CA_data.to_csv("../../../data/interim/CA_race_place_1980.csv", index=False)

# output: get racial composition in 1980 for cities with tax rates and rev in both 1977 and 1982

In [6]:
def normalize_str_cols(df, str_columns):
    for col in str_columns:
        df[col] = df[col].str.strip()
        df[col] = df[col].str.lower()
        df[col] = df[col].map(lambda x: None if x == "" else x)
        
normalize_str_cols(CA_data, CONTEXT_COLS)

In [7]:
city_rev_tax_rate = pd.read_csv("../../../data/interim/city_tax_rates_rev_77_82.csv",
                                dtype={c: str for c in ["ID", "County", "name_in_taxrate","name_in_rev"]})
city_rev_tax_rate.describe()

Unnamed: 0,Nom_Tax_Rate_All_Res_Med_1977,Nom_Tax_Rate_All_Res_Med_1982,Nom_Tax_Rate_Res_All_SFH_Med_1977,Nom_Tax_Rate_Res_All_SFH_Med_1982,Eff_Tax_Rate_All_Res_Med_1977,Eff_Tax_Rate_All_Res_Med_1982,Eff_Tax_Rate_Res_All_SFH_Med_1977,Eff_Tax_Rate_Res_All_SFH_Med_1982,AV_Ratio_All_Res_Med_1977,AV_Ratio_All_Res_Med_1982,Total_Revenue_adj_1977,Total_Revenue_adj_1982,Property_Tax_adj_1977,Property_Tax_adj_1982,prop_tax_ratio_1977,prop_tax_ratio_1982,total_rev_change_in_percentage,prop_tax_change_in_percentage,prop_tax_ratio_change
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
mean,12.596667,1.182222,12.776667,1.183333,1.932222,0.597222,1.908333,0.626667,17.922222,54.701111,388541.4,424455.7,69911.64469,42467.611111,0.177364,0.1086,0.124708,-0.323213,-0.068764
std,2.625222,0.103501,2.741831,0.105105,0.384701,0.156762,0.398796,0.166522,1.387573,13.301008,763730.4,823522.5,134590.305203,78370.890208,0.056768,0.038271,0.237767,0.11558,0.030849
min,9.1,1.0,9.1,1.0,1.35,0.34,1.31,0.3,13.7,30.63,30332.08,31505.0,5433.278418,3251.0,0.083512,0.043708,-0.135382,-0.487442,-0.133561
25%,10.8775,1.12,10.8925,1.115,1.5925,0.475,1.57,0.535,17.4,45.3425,51229.02,62954.5,8766.754529,6279.75,0.154403,0.083552,0.006428,-0.411148,-0.083239
50%,12.505,1.16,12.495,1.165,1.91,0.615,1.87,0.645,18.3,57.75,109038.7,132723.5,16535.543655,9912.5,0.178474,0.105888,0.039685,-0.318828,-0.064614
75%,13.6925,1.19,14.2775,1.19,2.2475,0.72,2.2575,0.7375,18.975,62.885,324650.4,323131.5,51590.976104,36915.75,0.194069,0.135817,0.173906,-0.250068,-0.042837
max,20.17,1.42,20.26,1.43,2.55,0.82,2.56,0.91,19.2,73.6,3117455.0,3346262.0,478270.724801,295354.0,0.304263,0.180371,0.848943,-0.081285,-0.029218


In [8]:
CA_data.columns

Index(['GISJOIN', 'YEAR', 'REGIONA', 'DIVISIONA', 'STATE', 'STATEA', 'SMSAA',
       'COUNTYA', 'CTY_SUBA', 'PLACE', 'PLACEA', 'TRACTA', 'BLCK_GRPA',
       'BLOCKA', 'EDINDA', 'ENUMDISTA', 'SCSAA', 'URB_AREAA', 'CDA', 'AIANHHA',
       'MCDSEQNOA', 'ZIPA', 'AREANAME', 'White', 'Black',
       'American Indian, Eskimo, and Aleut: American Indian',
       'American Indian, Eskimo, and Aleut: Eskimo',
       'American Indian, Eskimo, and Aleut: Aleut',
       'Asian and Pacific Islander: Japanese',
       'Asian and Pacific Islander: Chinese',
       'Asian and Pacific Islander: Filipino',
       'Asian and Pacific Islander: Korean',
       'Asian and Pacific Islander: Asian Indian',
       'Asian and Pacific Islander: Vietnamese',
       'Asian and Pacific Islander: Hawaiian',
       'Asian and Pacific Islander: Guamanian',
       'Asian and Pacific Islander: Samoan', 'Other', 'Spanish_Ori_White',
       'Spanish_Ori_Black',
       'Spanish_Ori_American Indian, Eskimo, Aleut, and Asian 

In [9]:
CA_white_percents = CA_data.copy()
CA_white_percents["non_spanish_orig_white_percentage"] = (CA_data["White"] - CA_data["Spanish_Ori_White"]) / CA_data["total_pop"]
CA_white_percents["white_percentage"] = CA_data["White"] / CA_data["total_pop"]
CA_white_percents["black_percentage"] = CA_data["Black"] / CA_data["total_pop"]
CA_white_percents["hispanic_percentage"] = (CA_data["Spanish_Ori_White"] + CA_data["Spanish_Ori_Black"] + 
                                            CA_data["Spanish_Ori_American Indian, Eskimo, Aleut, and Asian and Pacific Islander"] +
                                            CA_data["Spanish_Ori_Other"]) / CA_data["total_pop"]
CA_white_percents["hispanic_black_combined_percentage"] = (CA_white_percents["black_percentage"] + 
                                                           CA_white_percents["hispanic_percentage"])
_city_rev_tax_rate = city_rev_tax_rate.merge(CA_white_percents[["AREANAME",
                                                                "non_spanish_orig_white_percentage",
                                                                "white_percentage",
                                                                "black_percentage",
                                                                "hispanic_percentage",
                                                                "hispanic_black_combined_percentage"
                                                               ]], how="left", left_on="name_in_rev", right_on="AREANAME")

In [10]:
CA_data.head()

Unnamed: 0,GISJOIN,YEAR,REGIONA,DIVISIONA,STATE,STATEA,SMSAA,COUNTYA,CTY_SUBA,PLACE,...,Asian and Pacific Islander: Vietnamese,Asian and Pacific Islander: Hawaiian,Asian and Pacific Islander: Guamanian,Asian and Pacific Islander: Samoan,Other,Spanish_Ori_White,Spanish_Ori_Black,"Spanish_Ori_American Indian, Eskimo, Aleut, and Asian and Pacific Islander",Spanish_Ori_Other,total_pop
1351,g0600003,1980,4,9,california,6,,,,adelanto,...,1,1,5,0,130,66,5,23,101,2359
1352,g0600010,1980,4,9,california,6,,,,alameda,...,236,145,223,70,2551,2773,52,658,1859,69194
1353,g0600015,1980,4,9,california,6,,,,alamo,...,0,0,3,0,82,268,0,14,35,8822
1354,g0600020,1980,4,9,california,6,,,,albany,...,68,13,0,4,881,665,16,63,576,16450
1355,g0600025,1980,4,9,california,6,,,,alhambra,...,1062,33,10,10,9351,15285,86,398,8518,88902


In [11]:
_city_rev_tax_rate

Unnamed: 0,ID,County,name_in_taxrate,name_in_rev,Nom_Tax_Rate_All_Res_Med_1977,Nom_Tax_Rate_All_Res_Med_1982,Nom_Tax_Rate_Res_All_SFH_Med_1977,Nom_Tax_Rate_Res_All_SFH_Med_1982,Eff_Tax_Rate_All_Res_Med_1977,Eff_Tax_Rate_All_Res_Med_1982,...,prop_tax_ratio_1982,total_rev_change_in_percentage,prop_tax_change_in_percentage,prop_tax_ratio_change,AREANAME,non_spanish_orig_white_percentage,white_percentage,black_percentage,hispanic_percentage,hispanic_black_combined_percentage
0,52001001,"alameda county, ca",alameda city,alameda city,13.52,1.19,13.52,1.19,2.31,0.75,...,0.103934,0.014849,-0.438116,-0.083787,alameda city,0.679452,0.719528,0.038616,0.077203,0.115819
1,52001003,"alameda county, ca",berkeley city,berkeley city,15.78,1.18,15.78,1.18,2.53,0.61,...,0.180371,-0.135382,-0.487442,-0.123892,berkeley city,0.607322,0.628281,0.191346,0.048081,0.239426
2,52001005,"alameda county, ca",fremont city,fremont city,12.51,1.35,12.51,1.35,1.93,0.74,...,0.175628,0.013245,-0.308168,-0.081594,fremont city,0.661617,0.734445,0.022212,0.122519,0.144732
3,52001006,"alameda county, ca",hayward city,hayward city,12.85,1.31,12.85,1.31,1.99,0.71,...,0.140055,-0.125915,-0.311561,-0.037768,hayward city,0.544092,0.626904,0.047228,0.168107,0.215334
4,52001009,"alameda county, ca",oakland city,oakland city,14.45,1.31,14.45,1.31,2.42,0.6,...,0.1377,-0.042145,-0.326094,-0.05802,oakland city,0.317587,0.348795,0.428372,0.087384,0.515756
5,52001012,"alameda county, ca",san leandro city,san leandro city,9.1,1.1,9.1,1.1,1.57,0.42,...,0.10319,0.038669,-0.40165,-0.075937,san leandro city,0.69284,0.77313,0.010557,0.115157,0.125714
6,52010005,"fresno county, ca",fresno city,fresno city,12.28,1.0,12.28,1.0,1.85,0.72,...,0.137835,0.005362,-0.205786,-0.036645,fresno city,0.507247,0.580297,0.076625,0.190918,0.267543
7,52019017,"los angeles county, ca",glendale city,glendale city,11.2,1.12,11.2,1.1,1.89,0.34,...,0.065186,0.160679,-0.24002,-0.034369,glendale city,0.63172,0.732615,0.002809,0.150753,0.153562
8,52019026,"los angeles county, ca",long beach city,long beach city,13.0,1.12,14.84,1.13,2.25,0.62,...,0.069561,0.478136,-0.298424,-0.076996,long beach city,0.593626,0.655172,0.09887,0.123048,0.221919
9,52019027,"los angeles county, ca",los angeles city,los angeles city,13.88,1.15,15.18,1.16,2.24,0.65,...,0.088264,0.073395,-0.382454,-0.065153,los angeles city,0.375216,0.480253,0.13355,0.215726,0.349276


In [12]:
CA_white_percents[CA_white_percents["AREANAME"].str.contains("san francisco")]

Unnamed: 0,GISJOIN,YEAR,REGIONA,DIVISIONA,STATE,STATEA,SMSAA,COUNTYA,CTY_SUBA,PLACE,...,Spanish_Ori_White,Spanish_Ori_Black,"Spanish_Ori_American Indian, Eskimo, Aleut, and Asian and Pacific Islander",Spanish_Ori_Other,total_pop,non_spanish_orig_white_percentage,white_percentage,black_percentage,hispanic_percentage,hispanic_black_combined_percentage
1954,g0602485,1980,4,9,california,6,,,,san francisco,...,39920,1557,5513,36383,762347,0.465878,0.518243,0.113353,0.109364,0.222716
2013,g0602765,1980,4,9,california,6,,,,south san francisco,...,6445,79,599,3830,60346,0.480744,0.587545,0.032496,0.181503,0.213999


In [13]:
_city_rev_tax_rate.loc[_city_rev_tax_rate["ID"] == "052038001", "AREANAME"] = CA_white_percents.loc[CA_white_percents["GISJOIN"] == "g0602485",
                                                                                                   "AREANAME"].values[0]
_city_rev_tax_rate.loc[_city_rev_tax_rate["ID"] == "052038001", "non_spanish_orig_white_percentage"] = CA_white_percents.loc[CA_white_percents["GISJOIN"] == "g0602485",
                                                                                                   "non_spanish_orig_white_percentage"].values[0]
_city_rev_tax_rate.loc[_city_rev_tax_rate["ID"] == "052038001", "white_percentage"] = CA_white_percents.loc[CA_white_percents["GISJOIN"] == "g0602485",
                                                                                                   "white_percentage"].values[0]
_city_rev_tax_rate.loc[_city_rev_tax_rate["ID"] == "052038001", "black_percentage"] = CA_white_percents.loc[CA_white_percents["GISJOIN"] == "g0602485",
                                                                                                   "black_percentage"].values[0]
_city_rev_tax_rate.loc[_city_rev_tax_rate["ID"] == "052038001", "hispanic_percentage"] = CA_white_percents.loc[CA_white_percents["GISJOIN"] == "g0602485",
                                                                                                   "hispanic_percentage"].values[0]

_city_rev_tax_rate.loc[_city_rev_tax_rate["ID"] == "052038001", "hispanic_black_combined_percentage"] = CA_white_percents.loc[CA_white_percents["GISJOIN"] == "g0602485",
                                                                                                   "hispanic_black_combined_percentage"].values[0]


In [14]:
_city_rev_tax_rate

Unnamed: 0,ID,County,name_in_taxrate,name_in_rev,Nom_Tax_Rate_All_Res_Med_1977,Nom_Tax_Rate_All_Res_Med_1982,Nom_Tax_Rate_Res_All_SFH_Med_1977,Nom_Tax_Rate_Res_All_SFH_Med_1982,Eff_Tax_Rate_All_Res_Med_1977,Eff_Tax_Rate_All_Res_Med_1982,...,prop_tax_ratio_1982,total_rev_change_in_percentage,prop_tax_change_in_percentage,prop_tax_ratio_change,AREANAME,non_spanish_orig_white_percentage,white_percentage,black_percentage,hispanic_percentage,hispanic_black_combined_percentage
0,52001001,"alameda county, ca",alameda city,alameda city,13.52,1.19,13.52,1.19,2.31,0.75,...,0.103934,0.014849,-0.438116,-0.083787,alameda city,0.679452,0.719528,0.038616,0.077203,0.115819
1,52001003,"alameda county, ca",berkeley city,berkeley city,15.78,1.18,15.78,1.18,2.53,0.61,...,0.180371,-0.135382,-0.487442,-0.123892,berkeley city,0.607322,0.628281,0.191346,0.048081,0.239426
2,52001005,"alameda county, ca",fremont city,fremont city,12.51,1.35,12.51,1.35,1.93,0.74,...,0.175628,0.013245,-0.308168,-0.081594,fremont city,0.661617,0.734445,0.022212,0.122519,0.144732
3,52001006,"alameda county, ca",hayward city,hayward city,12.85,1.31,12.85,1.31,1.99,0.71,...,0.140055,-0.125915,-0.311561,-0.037768,hayward city,0.544092,0.626904,0.047228,0.168107,0.215334
4,52001009,"alameda county, ca",oakland city,oakland city,14.45,1.31,14.45,1.31,2.42,0.6,...,0.1377,-0.042145,-0.326094,-0.05802,oakland city,0.317587,0.348795,0.428372,0.087384,0.515756
5,52001012,"alameda county, ca",san leandro city,san leandro city,9.1,1.1,9.1,1.1,1.57,0.42,...,0.10319,0.038669,-0.40165,-0.075937,san leandro city,0.69284,0.77313,0.010557,0.115157,0.125714
6,52010005,"fresno county, ca",fresno city,fresno city,12.28,1.0,12.28,1.0,1.85,0.72,...,0.137835,0.005362,-0.205786,-0.036645,fresno city,0.507247,0.580297,0.076625,0.190918,0.267543
7,52019017,"los angeles county, ca",glendale city,glendale city,11.2,1.12,11.2,1.1,1.89,0.34,...,0.065186,0.160679,-0.24002,-0.034369,glendale city,0.63172,0.732615,0.002809,0.150753,0.153562
8,52019026,"los angeles county, ca",long beach city,long beach city,13.0,1.12,14.84,1.13,2.25,0.62,...,0.069561,0.478136,-0.298424,-0.076996,long beach city,0.593626,0.655172,0.09887,0.123048,0.221919
9,52019027,"los angeles county, ca",los angeles city,los angeles city,13.88,1.15,15.18,1.16,2.24,0.65,...,0.088264,0.073395,-0.382454,-0.065153,los angeles city,0.375216,0.480253,0.13355,0.215726,0.349276


In [15]:
_city_rev_tax_rate.to_csv("../../../data/interim/city_tax_rates_rev_77_82_with_race_80.csv", index=False)