In [158]:
import pandas as pd
import numpy as np

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

output_width = 1000
#output_width = 80 #//*** Normal Output width

#//*** Normal Output width
pd.set_option("display.width", output_width)

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
#pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)


Work with the Data for Term Project

Data is downloaded using DSC540 StoneburnerKurt TermProject _ Load_PreProcess_Data
which I supposed should be added as an external library if this was a production project.

Coded is separated to keep the project more manageable

In [159]:
#//*************************************
#//*** Import stored/preprocessed data
#//*************************************

#//*** Columns to remove from imported CSVs. We should be able to kill these on import if we were cool.
#//*** But we're not, so we'll use an expedient column delete list.

del_cols = ['Unnamed: 0', '_id']
#//*** Load datframes from file, because we mess them up
covid_ethnic_df = pd.read_csv("z_covid_ethnic_df.csv")
covid_cases_df = pd.read_csv("z_covid_cases_df.csv")
pop_attrib_df = pd.read_csv("z_pop_attrib_df.csv")
covid_project_df = pd.read_csv("z_covid_project_df.csv")

#//***********************************************************************************
#//*** Remove excess columns from read_csv
#//*** Use the loop in case we need to delete columns that are not exclusive to all
#//***********************************************************************************
for x in del_cols:
    if x in covid_cases_df.columns:
        covid_cases_df.drop([x], axis=1, inplace=True)

    if x in covid_ethnic_df.columns:
        covid_ethnic_df.drop([x], axis=1, inplace=True)
    
    if x in pop_attrib_df.columns:
        pop_attrib_df.drop([x], axis=1, inplace=True)
    
    if x in covid_project_df.columns:
        covid_project_df.drop([x], axis=1, inplace=True)
        
print(covid_cases_df.head())

#//*** Drop the counties labeled as unassigned.
#//*** We don't have data on those individuals
print(f"Length Before removing Unassigned County: {len(covid_cases_df)}")

covid_cases_df = covid_cases_df[~covid_cases_df['county'].isin(['Unassigned'])]

print(f"Length After removing Unassigned County: {len(covid_cases_df)}")

#//*** Remove the 'Out Of Country' listings
print(f"Length Before removing Out Of Country County: {len(covid_cases_df)}")

covid_cases_df = covid_cases_df[~covid_cases_df['county'].isin(['Out Of Country'])]

print(f"Length After removing Out Of Country County: {len(covid_cases_df)}")


#//*** Drop the rows with Other listed as ethnicity. This reopresents less than .1 % pf the population and doesn't have an
#//*** Equivalent in the federal data.
print(f"Length Before removing Other Race: {len(covid_ethnic_df)}")

covid_ethnic_df = covid_ethnic_df[~covid_ethnic_df['race_ethnicity'].isin(['Other'])]

print(f"Length After removing Other Race: {len(covid_ethnic_df)}")



        county  totalcountconfirmed  totalcountdeaths  newcountconfirmed  newcountdeaths                 date
0  Santa Clara                151.0               6.0                151               6  2020-03-18T00:00:00
1       Plumas                  0.0               0.0                  0               0  2020-03-18T00:00:00
2    El Dorado                  0.0               0.0                  0               0  2020-03-18T00:00:00
3     Mariposa                  0.0               0.0                  0               0  2020-03-18T00:00:00
4      Trinity                  0.0               0.0                  0               0  2020-03-18T00:00:00
Length Before removing Unassigned County: 19385
Length After removing Unassigned County: 19061
Length Before removing Out Of Country County: 19061
Length After removing Out Of Country County: 18762
Length Before removing Other Race: 2384
Length After removing Other Race: 2086


In [160]:
print(covid_ethnic_df['race_ethnicity'].unique())

['Latino' 'Multiracial' 'American Indian or Alaska Native' 'Black' 'Asian'
 'Native Hawaiian or Pacific Islander' 'White'
 'Native Hawaiian and other Pacific Islander' 'Multi-Race']


In [161]:
#//*** Combine Pacific Islander and Hawaiian values into one.
#//*** Hawaiian isn't a good choice of variable name. It's kind of racist and definitely non-inclusive.
#//*** But I need this to work, before I can reconsider a different variable name

covid_ethnic_df['race_ethnicity']=covid_ethnic_df['race_ethnicity'].str.replace('Native Hawaiian or Pacific Islander','Hawaiian')
covid_ethnic_df['race_ethnicity']=covid_ethnic_df['race_ethnicity'].str.replace('Native Hawaiian and other Pacific Islander','Hawaiian')
covid_ethnic_df['race_ethnicity']=covid_ethnic_df['race_ethnicity'].str.replace('Multi-Race','Multiracial' )
covid_ethnic_df['race_ethnicity']=covid_ethnic_df['race_ethnicity'].str.replace('American Indian or Alaska Native','Native' )

In [162]:
#print(pop_attrib_df)

HTML(pop_attrib_df.to_html())

Unnamed: 0,cty_fibs,county,population,Latino,White,Asian,Black,American Indian or Alaska Native,Hawaiian,Multiracial,0rf_num,0rf_rate,0rf_err,1-2rf_num,1-2rf_rate,1-2rf_err,3plrf_num,3plrf_rate,3plrf_err
0,1,Alameda,1671329,373055,512134,529698,169954,4157,13474,68857,52135,3.16,1.99,1159312,70.37,6.98,436060,26.47,6.84
1,3,Alpine,1129,139,692,18,4,243,0,33,241,21.89,8.4,579,52.59,10.35,281,25.52,9.31
2,5,Amador,39752,5753,30742,575,994,606,83,999,7169,20.35,5.97,17817,50.59,7.16,10235,29.06,6.49
3,7,Butte,219186,37731,155415,10573,3526,3390,465,8086,44433,19.46,5.32,122804,53.79,6.88,61078,26.75,6.02
4,9,Calaveras,45905,5967,36672,719,420,562,98,1467,9940,21.93,6.38,22900,50.53,7.46,12478,27.53,6.6
5,11,Colusa,21547,13018,7344,266,220,293,73,333,4223,19.65,5.75,12343,57.45,7.1,4920,22.9,6.19
6,13,Contra Costa,1153526,300420,492393,204045,100798,3126,5379,47365,150886,13.16,3.91,710710,61.99,6.99,284932,24.85,6.49
7,15,Del Norte,27812,5596,17236,802,917,2059,42,1160,5189,20.77,6.47,13874,55.54,7.66,5915,23.68,6.47
8,17,El Dorado,192843,25378,148903,8974,1696,1500,328,6064,55253,29.06,6.34,94483,49.69,7.03,40421,21.26,5.76
9,19,Fresno,999101,537180,286049,103430,46274,5967,1437,18764,68407,6.95,3.13,612805,62.24,7.1,303388,30.81,6.79


In [163]:
#//*** Sort Time Series by date and reset index
covid_cases_df = covid_cases_df.sort_values(by='date')
covid_ethnic_df = covid_ethnic_df.sort_values(by='date')

#//*** Reset the index
#covid_ethnic_df.reset_index(inplace=True)





#//*** Get first Ethnic_df date
ethnic_start_date = covid_ethnic_df['date'].iloc[0]
print(f"Ethinic State: {ethnic_start_date}")

#//*************************************************************************************************
#//*** Get the iloc (index #) of the first covid_case_df entry to match the date in covid_ethic_df
#//*** Compound code
#//*** 1. Get the entries where the date matches ethnic start date
#//*** 2. Get the first value from the list
#//*** 3. Get the Index (name) of that entry
#//*** 4. Get the iloc value of the name entry. This is the value to slice from covid_cases_df
#//*************************************************************************************************
#//*** I hate these, but I see the appeal
#//*************************************************************************************************
covid_start_iloc = covid_cases_df.index.get_loc(covid_cases_df[ covid_cases_df['date'] == ethnic_start_date].iloc[0].name)

#print(covid_cases_df.iloc[covid_start_iloc])
#//*** Merge Time Series covid_ethnic_df - covid_cases_df
covid_cases_df = covid_cases_df.iloc[covid_start_iloc:]

#//*** Start the Bg Table DF with a subset of
#bt_df = covid_ethnic_df
#print
#print(covid_ethnic_df.head())
#print(bt_df.head())
    
    
bt_df = pd.merge(covid_ethnic_df,covid_cases_df,how='left', on='date')

bt_df = pd.merge(bt_df,pop_attrib_df,how='left', on='county')

print(len(bt_df))

for group in bt_df.groupby('date'):
    print(group[1])
    break

Ethinic State: 2020-04-13T00:00:00
120988
    race_ethnicity  cases  case_percentage  deaths  death_percentage  percent_ca_population                 date       county  totalcountconfirmed  totalcountdeaths  newcountconfirmed  newcountdeaths  cty_fibs  population  Latino   White   Asian  Black  American Indian or Alaska Native  Hawaiian  Multiracial  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
0           Latino   5276            35.99     170             28.38                   38.9  2020-04-13T00:00:00   Santa Cruz                 82.0               1.0                  2               0        87      273213   92922  155094   12950   2718                              1023       336         8170    33624     12.74     3.78     160164       60.68       7.33      70169       26.58       6.90
1           Latino   5276            35.99     170             28.38                   38.9  2020-04-13T00:00:00    Mendocino                  4.0

In [164]:
#//*** Rename Columns
g = {
    "bt_rename" : {
        "race_ethnicity" : "race",
        "case_percentage" : "case_percent",
        "death_percentage" : "death_percent",
        "percent_ca_population" : "percent_ca_pop",
        "totalcountconfirmed" : "confirmed",
        "totalcountdeaths" : "deaths",
        "newcountconfirmed" : "newconfirm",
        "newcountdeaths" : "newdeath",
        "population" : "pop",
        "American Indian or Alaska Native" : "Native"
    }
}
bt_df = bt_df.rename(columns = g["bt_rename"])

cols = list(bt_df.columns)

#//*** Verify values in race column match the attribute columns.
#//*** Double checking our renaming work. It's good to make sure we have everything aligned going in.

print(cols)
for race in bt_df['race'].unique():
    print(f"{race} {race in cols}")

#//*** After much retcon the columns and data match



['race', 'cases', 'case_percent', 'deaths', 'death_percent', 'percent_ca_pop', 'date', 'county', 'confirmed', 'deaths', 'newconfirm', 'newdeath', 'cty_fibs', 'pop', 'Latino', 'White', 'Asian', 'Black', 'Native', 'Hawaiian', 'Multiracial', '0rf_num', '0rf_rate', '0rf_err', '1-2rf_num', '1-2rf_rate', '1-2rf_err', '3plrf_num', '3plrf_rate', '3plrf_err']
Latino True
Multiracial True
Native True
Black True
Asian True
Hawaiian True
White True


In [165]:
#//*** Reduce Excess Columns.
#//*** Build ethnic_pop columns which is the population of the identified race_ethnicity for the row.
#//*** Each daily entry is by race and county. Only one race value is applicable per row.\
#//*** Build a new column that indicates the county population of that category
#//*** Build a new column that indicates the percentage of that race in that county.

#//*** Temporary lists to convert to columns
cty_ethnic_pop = []
cty_ethnic_percent = []

#//*** Loop through each row of bt_df
for index,row in bt_df.iterrows():
    try:
        #//*** get the value from the appropriate row['race'] column.
        #//*** Example if row['race'] == 'Latino'
        #//*** Get the 'Latino' population for the county, which would be row ['Latino']
        cty_ethnic_pop.append( row [ row['race'] ] )

        #//*** Build the percentage of racial population in county.
        cty_ethnic_percent.append( row [ row['race'] ] / row [ 'pop'] )

    except: 
        #//*** Stop and print failing atributes
        #//*** These should all be retcon appropriate
        print(f"{row['race']} {row['county']}")
        break

#//*** For readability, replace Latino and White columns with ethnic_pop and ethnic_percent
bt_df['Latino'] = pd.Series(data=cty_ethnic_pop)
bt_df['White'] = pd.Series(data=cty_ethnic_percent)

#//*** Remove excess columns
for col in ['cty_fibs', 'Asian', 'Black', 'Native', 'Hawaiian', 'Multiracial']:
    if col in bt_df.columns:
        bt_df.pop(col)

#//*** Rename Latino column to pop_race        
if 'Latino' in bt_df.columns:
    bt_df = bt_df.rename(columns = {'Latino' : 'pop_race'})

#//*** Rename White column to percent_cty_pop        
if 'White' in bt_df.columns:
    bt_df = bt_df.rename(columns = {'White' : 'percent_cty_pop'})



      race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date         county  confirmed  deaths  newconfirm  newdeath       pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
0   Latino   5276         35.99     170          28.38            38.9  2020-04-13T00:00:00     Santa Cruz       82.0     1.0           2         0    273213     92922         0.340108    33624     12.74     3.78     160164       60.68       7.33      70169       26.58       6.90
1   Latino   5276         35.99     170          28.38            38.9  2020-04-13T00:00:00      Mendocino        4.0     0.0           0         0     86749     22379         0.257974    16030     18.43     5.23      44837       51.55       7.10      26110       30.02       6.42
2   Latino   5276         35.99     170          28.38            38.9  2020-04-13T00:00:00      San Mateo      679.0    25.0          15         0    766573

In [171]:
print(len(bt_df))
#//*** Print a few rows from each day to get a feel for the total data
for group in bt_df.groupby('date'):
    print(group[1].head(5))

    


120988
     race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date      county  confirmed  deaths  newconfirm  newdeath     pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
0  Latino   5276         35.99     170          28.38            38.9  2020-04-13T00:00:00  Santa Cruz       82.0     1.0           2         0  273213     92922         0.340108    33624     12.74     3.78     160164       60.68       7.33      70169       26.58       6.90
1  Latino   5276         35.99     170          28.38            38.9  2020-04-13T00:00:00   Mendocino        4.0     0.0           0         0   86749     22379         0.257974    16030     18.43     5.23      44837       51.55       7.10      26110       30.02       6.42
2  Latino   5276         35.99     170          28.38            38.9  2020-04-13T00:00:00   San Mateo      679.0    25.0          15         0  766573    183978       

7718  Asian   4244          11.8     345           16.8            15.4  2020-05-02T00:00:00        Nevada       41.0     1.0           0         0    99755      1349         0.013523    25819     25.98     6.04      48361       48.67       6.95      25184       25.35       6.11
       race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date          county  confirmed  deaths  newconfirm  newdeath      pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
8120  White   9607     25.990153     730      34.894837            36.6  2020-05-03T00:00:00            Inyo       20.0     1.0           0         0    18039     10970         0.608127     3994     22.43     5.93       8887       49.90       7.22       4929       27.68       6.43
8121  White   9607     25.990153     730      34.894837            36.6  2020-05-03T00:00:00          Tehama        1.0     1.0           0         0    650

18680  Black   4045           5.3     389           10.0             6.0  2020-05-29T00:00:00         Mono       34.0     1.0           0         0     14444       105         0.007269     3805     26.81     7.76       7519       52.99       8.87       2866       20.20       7.23
              race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date  county  confirmed  deaths  newconfirm  newdeath      pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
19082  Multiracial    617           0.8      26            0.7             2.2  2020-05-30T00:00:00  Merced      285.0     7.0           0         0   277680      5051         0.018190    26224      9.72     4.19     170334       63.15       7.32      73172       27.13       6.75
19083  Multiracial    617           0.8      26            0.7             2.2  2020-05-30T00:00:00    Mono       34.0     1.0           0         0    14444

           race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date           county  confirmed  deaths  newconfirm  newdeath      pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
29638  Hawaiian    881           0.6      24            0.4             0.3  2020-06-25T00:00:00            Glenn       60.0     0.0           6         0    28393        32         0.001127     6805     24.45     6.91      14119       50.73       7.93       6910       24.83       6.56
29639  Hawaiian    881           0.6      24            0.4             0.3  2020-06-25T00:00:00   San Bernardino    10617.0   249.0         406         4  2180085      6584         0.003020   208242      9.68     3.55    1390345       64.60       7.03     553507       25.72       6.61
29640  Hawaiian    881           0.6      24            0.4             0.3  2020-06-25T00:00:00  San Luis Obispo      476.0     1.0       

43040  Multiracial   2305           0.8      46            0.5             2.2  2020-07-28T00:00:00     Tehama      199.0     1.0           7         0    65084      1810         0.027810    15426     24.29     5.96      32584       51.30       6.82      15501       24.41       5.73
           race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date        county  confirmed  deaths  newconfirm  newdeath      pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
43442  Hawaiian   1844           0.6      47            0.5             0.3  2020-07-29T00:00:00        Alpine        2.0     0.0           0         0     1129         0         0.000000      241     21.89     8.40        579       52.59      10.35        281       25.52       9.31
43443  Hawaiian   1844           0.6      47            0.5             0.3  2020-07-29T00:00:00      Humboldt      221.0     4.0           3       

54814  Latino  269683          59.7    5866           48.3            38.9  2020-08-26T00:00:00  Sacramento    16708.0   259.0         107         7  1552058    366764         0.236308    86851      5.68     2.56    1016722       66.47       6.99     425977       27.85       6.79
         race   cases  case_percent  deaths  death_percent  percent_ca_pop                 date        county  confirmed  deaths  newconfirm  newdeath      pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
55216  Latino  273132          59.8    5957           48.4            38.9  2020-08-27T00:00:00       Ventura    10483.0   110.0          71         3   846006    365835         0.432426    79671      9.42     3.35     535084       63.26       7.11     231114       27.32       6.76
55217  Latino  273132          59.8    5957           48.4            38.9  2020-08-27T00:00:00  Contra Costa    13379.0   173.0         131         1  1

64964  Native   1386           0.3      45            0.3             0.5  2020-09-20T00:00:00  Los Angeles   260770.0  6353.0         969        23  10039107     18763         0.001869   294048      2.94     2.00    6698377       66.87       7.15    3024086       30.19       7.03
         race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date      county  confirmed  deaths  newconfirm  newdeath     pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
65366  Native   1395           0.3      45            0.3             0.5  2020-09-21T00:00:00      Merced     8749.0   137.0           4         3  277680      1157         0.004167    26224      9.72     4.19     170334       63.15       7.32      73172       27.13       6.75
65367  Native   1395           0.3      45            0.3             0.5  2020-09-21T00:00:00    Siskiyou      146.0     0.0           0         0   43539     

75926  Black  25928           4.2    1258            7.5             6.0  2020-10-17T00:00:00    San Benito     1389.0    14.0           1         0    62808       622         0.009903     5128      8.34     2.66      41001       66.66       6.98      15381       25.01       6.79
         race   cases  case_percent  deaths  death_percent  percent_ca_pop                 date           county  confirmed  deaths  newconfirm  newdeath     pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
76328  Latino  375084          61.1    8139           48.6            38.9  2020-10-18T00:00:00             Yuba     1176.0    10.0          12         0   78668     22925         0.291415    12440     16.02     5.10      46124       59.39       7.42      19094       24.59       6.50
76329  Latino  375084          61.1    8139           48.6            38.9  2020-10-18T00:00:00           Placer     3938.0    52.0          40      

           race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date     county  confirmed  deaths  newconfirm  newdeath     pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
87290  Hawaiian   3923           0.5      86            0.5             0.3  2020-11-14T00:00:00   Monterey    12854.0   108.0         173         1  434061      1960         0.004515    31346      7.42     3.24     269307       63.71       7.29     122057       28.87       6.98
87291  Hawaiian   3923           0.5      86            0.5             0.3  2020-11-14T00:00:00  El Dorado     1648.0     4.0          37         0  192843       328         0.001701    55253     29.06     6.34      94483       49.69       7.03      40421       21.26       5.76
87292  Hawaiian   3923           0.5      86            0.5             0.3  2020-11-14T00:00:00     Shasta     3195.0    36.0         212         0  180080    

98256  Latino  630931          57.4    9834           47.8            38.9  2020-12-11T00:00:00     Inyo      372.0    17.0           7         0    18039      4230         0.234492     3994     22.43     5.93       8887       49.90       7.22       4929       27.68       6.43
         race   cases  case_percent  deaths  death_percent  percent_ca_pop                 date         county  confirmed  deaths  newconfirm  newdeath      pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
98658  Latino  643448          57.3    9918           47.8            38.9  2020-12-12T00:00:00         Orange   100468.0  1680.0        2111        18  3175692   1081091         0.340427   140901      4.45     2.41    2100896       66.41       6.90     921576       29.13       6.72
98659  Latino  643448          57.3    9918           47.8            38.9  2020-12-12T00:00:00           Lake     1232.0    22.0          27         0   

110030  Black  78392           4.0    1934            6.7             6.0  2021-01-09T00:00:00  Calaveras     1425.0    22.0          40         0   45905       420         0.009149     9940     21.93     6.38      22900       50.53       7.46      12478       27.53       6.60
         race  cases  case_percent  deaths  death_percent  percent_ca_pop                 date     county  confirmed  deaths  newconfirm  newdeath      pop  pop_race  percent_cty_pop  0rf_num  0rf_rate  0rf_err  1-2rf_num  1-2rf_rate  1-2rf_err  3plrf_num  3plrf_rate  3plrf_err
110432  Black  79786           4.0    1955            6.7             6.0  2021-01-10T00:00:00       Inyo      748.0    27.0          14         0    18039       184         0.010200     3994     22.43     5.93       8887       49.90       7.22       4929       27.68       6.43
110433  Black  79786           4.0    1955            6.7             6.0  2021-01-10T00:00:00     Colusa     1478.0     9.0          10         0    21547       22

In [166]:
"""

cols = list(hier_df.columns)

for x in range(0,len(cols)):
    print(x)

hier_df = bt_df.set_index([bt_df['date'],bt_df['county']])
print(hier_df)

#HTML(hier_df.to_html())
"""

"\n\ncols = list(hier_df.columns)\n\nfor x in range(0,len(cols)):\n    print(x)\n\nhier_df = bt_df.set_index([bt_df['date'],bt_df['county']])\nprint(hier_df)\n\n#HTML(hier_df.to_html())\n"

In [167]:
#//*** Let's run some correllations for funsies
pop_cols = ['population', 'Latino','White','Asian','Black','American Indian or Alaska Native', 'Hawaiian' ]
rf_cols = ['0rf_num', '1-2rf_num',  '3plrf_num']
rf_cols = ['0rf_rate', '1-2rf_rate',  '3plrf_rate']

for x in pop_cols:
    for y in rf_cols:
        print(f"{x} {y} - {pop_attrib_df[ [x,y] ].corr().iloc[0].iloc[1] }")

population 0rf_rate - -0.4567961826407012
population 1-2rf_rate - 0.45180422307614304
population 3plrf_rate - 0.1425255103467081
Latino 0rf_rate - -0.40905527183588614
Latino 1-2rf_rate - 0.3837575774517347
Latino 3plrf_rate - 0.1806483757865183
White 0rf_rate - -0.464199120421134
White 1-2rf_rate - 0.4797158976341499
White 3plrf_rate - 0.09234516802071475
Asian 0rf_rate - -0.5087228664495507
Asian 1-2rf_rate - 0.5231641998293637
Asian 3plrf_rate - 0.1079399281808924
Black 0rf_rate - -0.38279510407369116
Black 1-2rf_rate - 0.3729504695343319
Black 3plrf_rate - 0.13387953801240074
American Indian or Alaska Native 0rf_rate - -0.3905801458839876
American Indian or Alaska Native 1-2rf_rate - 0.3961581308641285
American Indian or Alaska Native 3plrf_rate - 0.09669106495836434
Hawaiian 0rf_rate - -0.5532497192513318
Hawaiian 1-2rf_rate - 0.5874851204300123
Hawaiian 3plrf_rate - 0.0700536245231904
