In [1]:
import pandas as pd
pd.options.display.max_rows=200
pd.options.display.max_columns=200

## From README:
Each row of the data represents one precinct.
* The column `prec_20` is the unique precinct identifier. It concatenates the city ward and division numbers.
* There are 122 columns of election results, structured as described above. The column names are of the format "OFFICE_CANDIDATENAME." The first of these is `COUNCIL AT LARGE-DEM_ALLAN DOMB` and the last is `LIEUTENANT GOVERNOR DEM - Write-in`.
* Cluster ID values for each of three-, four-, five-, and six-cluster groupings (`clust_3`,`clust_4`,`clust_5`, and `clust_6`). These are  automatically generated numeric IDs, and they aren't consistent from grouping to grouping.
* Precinct-level racial demographic data directly from the 2020 Census, in the columns `hisp`, `black`, `white`, `aapi`, and `other`. Shares of the total population are given as `hisp_share`, `black_share`, `white_share`, `aapi_share`, and `other_share`. This considers the adult (18+) population only (table P4 in the Census).
* Raw precinct-level ACS demographics crosswalked on a population-weighted basis from census-tract-level data. These are the 6-digit fields that start with the letter "B." A dictionary denoting the value of these is [available from the Census Bureau](https://www.census.gov/programs-surveys/acs/technical-documentation/table-shells.html).
* Calculated ACS demographics:
    * `median_age` is the median age of all residents
    * `mean_household` is the average number of residents per household
    * `foreign_share` is the share of residents born outside the USA (USA includes Puerto Rico and other territories)
    * `noneng_share` is the share of residents speaking a language other than English at home, irrespective of how well they speak English
    * `edu_attain` is the share of residents having earned a Bachelor's degree or higher
    * `mean_household_inc` is the average income of households
    * `pov` is the poverty rate of individuals
    * `mean_commute` is the average commute time for workers
    * `two_parent` is the share of households with children that have two parents living at home
    * `child_house` is the share of households where any number of children under 18 are resident
    * `vet_share` is the share of adults who are veterans of the US military
    * `vacancy` is the share of housing units in the precinct that are unoccupied
    * `renter_rate` is the share of households who rent rather than own their homes

In [2]:
def load_precinct_data():
    df=pd.read_csv('../prec_results_demos.csv')
    df['prec_20'] = df['prec_20'].apply(lambda x: f'{int(x):04}')
    df['ward'] = df['prec_20'].apply(lambda x: x[:2])
    df['prec'] = df['prec_20'].apply(lambda x: x[2:])
    return df.set_index(['ward','prec']).sort_index()

In [3]:
df=load_precinct_data()
# df

In [4]:
vote_cols = [col for col in df.columns if col[0] == col[0].upper()]
qcols = [col for col in df.columns if col[0] != col[0].upper()]
# qcols

In [5]:
# vote_cols

In [6]:
import plotly.express as px
import plotly.io as pio
pio.templates.default = "none"

In [7]:
# px.scatter(
#     df,
#     x='white_share',
#     # y='mean_household_inc',
#     y='PRESIDENT OF THE UNITED STATES-DEM_BERNIE SANDERS',
#     height=800,
#     width=800,
#     color='largest_race',
# )

In [8]:
dfq = df.select_dtypes('number')
dfcorr = dfq.corr()

In [27]:
vcols=[c for c in dfcorr if c[0]==c[0].lower() and c.split('_')[0] not in {'clust','prec'} and c+'_share' not in set(dfcorr.columns)]
vdfcorr = dfcorr.loc[vcols]
vvdfcorr = dfcorr.loc[vcols][vcols]
vvdfcorr

Unnamed: 0,vtd,total,hisp_share,white_share,black_share,aapi_share,other_share,median_age,mean_household,foreign_share,noneng_share,edu_attain,mean_household_inc,pov,mean_commute,two_parent,child_house,vet_share,vacancy,renter_rate,largest_race_size
vtd,1.0,0.009116,0.014658,0.05893,-0.109014,0.176257,0.07807,-0.012281,-0.051224,0.341346,0.23345,-0.298622,-0.164093,-0.121962,0.124302,0.104737,0.280324,0.142544,-0.271632,-0.277951,-0.26619
total,0.009116,1.0,-0.014971,0.355213,-0.398994,0.350688,0.056104,-0.026548,-0.201539,0.274309,0.152271,0.367749,0.323989,-0.159779,-0.235846,0.291073,-0.25468,-0.150745,-0.221634,0.12048,-0.298044
hisp_share,0.014658,-0.014971,1.0,-0.212971,-0.304589,0.006245,-0.223784,0.033326,-0.008357,0.155816,0.752432,-0.345064,-0.259883,0.413581,0.0399,-0.016337,0.41878,-0.143503,-0.072501,0.016496,-0.346949
white_share,0.05893,0.355213,-0.212971,1.0,-0.828227,0.203854,0.05754,0.011649,0.013137,0.167298,-0.011583,0.638716,0.686007,-0.595988,-0.361548,0.702118,-0.370652,-0.031378,-0.404921,-0.165488,-0.123445
black_share,-0.109014,-0.398994,-0.304589,-0.828227,1.0,-0.451477,-0.031444,-0.021871,-0.014881,-0.41534,-0.481311,-0.447199,-0.517785,0.345535,0.330162,-0.693403,0.130321,0.154633,0.44954,0.106811,0.448172
aapi_share,0.176257,0.350688,0.006245,0.203854,-0.451477,1.0,0.166025,-0.020147,0.02099,0.697868,0.447715,0.192686,0.154798,-0.097428,-0.129632,0.314396,-0.047698,-0.186906,-0.22305,0.079639,-0.583686
other_share,0.07807,0.056104,-0.223784,0.05754,-0.031444,0.166025,1.0,0.003697,0.041163,0.162952,-0.05026,0.17107,0.088647,-0.07236,0.028725,0.049183,-0.040643,-0.127754,0.018069,0.192921,-0.2707
median_age,-0.012281,-0.026548,0.033326,0.011649,-0.021871,-0.020147,0.003697,1.0,0.03096,0.014021,0.024717,-0.00267,0.097838,-0.011299,0.285925,-0.02181,0.03355,-0.016189,0.00425,-0.051962,0.0012
mean_household,-0.051224,-0.201539,-0.008357,0.013137,-0.014881,0.02099,0.041163,0.03096,1.0,0.03373,0.00715,0.033762,0.087065,-0.01776,0.163494,0.054949,0.066837,0.014483,0.04598,0.069881,0.020427
foreign_share,0.341346,0.274309,0.155816,0.167298,-0.41534,0.697868,0.162952,0.014021,0.03373,1.0,0.696063,0.012558,0.057903,-0.096413,-0.017815,0.344446,0.168222,-0.171475,-0.352567,-0.012407,-0.479393


In [28]:
vdfcorr['PRESIDENT OF THE UNITED STATES-DEM_HILLARY CLINTON'].sort_values(ascending=False)

black_share           0.624353
pov                   0.362866
vacancy               0.320464
largest_race_size     0.311027
mean_commute          0.254514
child_house           0.240631
hisp_share            0.213689
vet_share             0.111738
renter_rate           0.048263
median_age            0.007680
mean_household        0.003304
noneng_share         -0.001900
vtd                  -0.109187
other_share          -0.171924
foreign_share        -0.193649
aapi_share           -0.347302
total                -0.347817
mean_household_inc   -0.392415
edu_attain           -0.448308
two_parent           -0.500731
white_share          -0.719399
Name: PRESIDENT OF THE UNITED STATES-DEM_HILLARY CLINTON, dtype: float64

In [29]:
list(vdfcorr.columns)

['COUNCIL AT LARGE-DEM_ALLAN DOMB',
 'COUNCIL AT LARGE-DEM_BLONDELL REYNOLDS BROWN',
 'COUNCIL AT LARGE-DEM_CARLA M CAIN',
 'COUNCIL AT LARGE-DEM_DEREK S GREEN',
 'COUNCIL AT LARGE-DEM_ED NEILSON',
 'COUNCIL AT LARGE-DEM_FRANK RIZZO',
 'COUNCIL AT LARGE-DEM_HELEN GYM',
 'COUNCIL AT LARGE-DEM_ISAIAH THOMAS',
 'COUNCIL AT LARGE-DEM_JENNE AYERS',
 'COUNCIL AT LARGE-DEM_MARNIE AUMENT LOUGHREY',
 'COUNCIL AT LARGE-DEM_PAUL STEINKE',
 'COUNCIL AT LARGE-DEM_SHERRIE COHEN',
 'COUNCIL AT LARGE-DEM_THOMAS WYATT',
 'COUNCIL AT LARGE-DEM_W WILSON GOODE JR',
 'COUNCIL AT LARGE-DEM_WILLIAM K GREENLEE',
 'COUNCIL AT LARGE-DEM_WILSON ALEXANDER',
 'COUNCIL AT LARGE-DEM_Write In ',
 'JUSTICE OF THE SUPREME COURT-DEM_ANNE E LAZARUS',
 'JUSTICE OF THE SUPREME COURT-DEM_CHRISTINE DONOHUE',
 'JUSTICE OF THE SUPREME COURT-DEM_DAVID WECHT',
 'JUSTICE OF THE SUPREME COURT-DEM_DWAYNE D WOODRUFF',
 'JUSTICE OF THE SUPREME COURT-DEM_JOHN HENRY FORADORA',
 'JUSTICE OF THE SUPREME COURT-DEM_KEVIN M DOUGHERTY',
 'JU

In [30]:
# dfcorr

In [31]:
dfq.clust()

AttributeError: 'DataFrame' object has no attribute 'clust'

In [34]:
import plotly.figure_factory as ff
figdf = vvdfcorr
fig = ff.create_dendrogram(figdf, orientation='left', labels=figdf.index)
fig.update_layout(height=800, width=1600)
fig

Unnamed: 0,vtd,total,hisp,white,black,aapi,other,hisp_share,white_share,black_share,aapi_share,other_share,median_age,mean_household,foreign_share,noneng_share,edu_attain,mean_household_inc,pov,mean_commute,two_parent,child_house,vet_share,vacancy,renter_rate,largest_race_size
vtd,1.0,0.009116,0.043911,-0.006548,-0.04592,0.084679,0.051398,0.014658,0.05893,-0.109014,0.176257,0.07807,-0.012281,-0.051224,0.341346,0.23345,-0.298622,-0.164093,-0.121962,0.124302,0.104737,0.280324,0.142544,-0.271632,-0.277951,-0.26619
total,0.009116,1.0,0.215336,0.692643,-0.022485,0.612681,0.648315,-0.014971,0.355213,-0.398994,0.350688,0.056104,-0.026548,-0.201539,0.274309,0.152271,0.367749,0.323989,-0.159779,-0.235846,0.291073,-0.25468,-0.150745,-0.221634,0.12048,-0.298044
hisp,0.043911,0.215336,1.0,-0.083919,-0.214224,0.111454,-0.013163,0.93409,-0.156412,-0.344523,0.089866,-0.194742,0.03045,-0.080674,0.231243,0.745821,-0.28286,-0.197373,0.35844,-0.004653,0.032958,0.396716,-0.181405,-0.143154,0.001382,-0.417368
white,-0.006548,0.692643,-0.083919,1.0,-0.579813,0.386989,0.477584,-0.203293,0.881502,-0.742165,0.252418,0.068965,0.003615,-0.038611,0.194152,0.005086,0.65804,0.654276,-0.503605,-0.356038,0.614095,-0.442622,-0.077149,-0.31655,-0.013822,-0.141383
black,-0.04592,-0.022485,-0.214224,-0.579813,1.0,-0.239568,-0.064247,-0.26061,-0.740587,0.871678,-0.339343,-0.031649,-0.047939,-0.179217,-0.309158,-0.39547,-0.376654,-0.452515,0.296803,0.256857,-0.600949,0.109564,0.10626,0.288545,0.082953,0.276637
aapi,0.084679,0.612681,0.111454,0.386989,-0.239568,1.0,0.511481,-0.017301,0.201971,-0.406929,0.884639,0.146111,-0.023931,0.007319,0.585496,0.359162,0.257351,0.174948,-0.048694,-0.181896,0.274152,-0.15185,-0.197824,-0.15034,0.181501,-0.496088
other,0.051398,0.648315,-0.013163,0.477584,-0.064247,0.511481,1.0,-0.157307,0.26607,-0.284754,0.354573,0.748796,-0.00602,-0.029507,0.313132,0.086712,0.36244,0.259933,-0.14475,-0.11475,0.228012,-0.183112,-0.183659,-0.124864,0.244656,-0.374967
hisp_share,0.014658,-0.014971,0.93409,-0.203293,-0.26061,-0.017301,-0.157307,1.0,-0.212971,-0.304589,0.006245,-0.223784,0.033326,-0.008357,0.155816,0.752432,-0.345064,-0.259883,0.413581,0.0399,-0.016337,0.41878,-0.143503,-0.072501,0.016496,-0.346949
white_share,0.05893,0.355213,-0.156412,0.881502,-0.740587,0.201971,0.26607,-0.212971,1.0,-0.828227,0.203854,0.05754,0.011649,0.013137,0.167298,-0.011583,0.638716,0.686007,-0.595988,-0.361548,0.702118,-0.370652,-0.031378,-0.404921,-0.165488,-0.123445
black_share,-0.109014,-0.398994,-0.344523,-0.742165,0.871678,-0.406929,-0.284754,-0.304589,-0.828227,1.0,-0.451477,-0.031444,-0.021871,-0.014881,-0.41534,-0.481311,-0.447199,-0.517785,0.345535,0.330162,-0.693403,0.130321,0.154633,0.44954,0.106811,0.448172
