In [1]:
import pandas as pd
import altair as alt
import json
import geopandas as gpd

In [16]:
%qtconsole

In [6]:
def load_county_ids(fname):
    with open(fname, 'r') as f:
        county_ids = json.load(f)
    cid_table = pd.DataFrame(data=[str(i) for i in county_ids.values()], 
                            index=[int(i) for i in county_ids.keys()])
    return cid_table

cid_table = load_county_ids('nc-election/county_ids.json')

In [7]:
def load_results_from_json(fname):
    precinct_results = {}
    with open(fname,'r') as f:
        results_j = json.load(f)

    for k in results_j:
        county_name = cid_table.loc[cid_table.index.astype(str)==k][0].iloc[0]
        for p in results_j[k]:
            precinct_key = county_name+'_'+p['aid']
            if precinct_key in precinct_results:
                precinct_results[precinct_key][p['bnm']] = int(p['vct'])
            else:
                precinct_results[precinct_key] = {'pid':precinct_key, p['bnm']: int(p['vct'])}
    
    results = pd.DataFrame.from_dict([precinct_results[k] for k in precinct_results])
    results['County'] = results['pid'].map(lambda r: r.split('_')[0])
    
    return results

In [14]:
def load_results_from_csv(fname):
    results = pd.read_csv(fname)
    results = results.set_index(results.columns[0])
    return results

In [32]:
# results = load_results_from_csv('nc-election/precinct_results_senate.csv')
# results.drop(['prec_id'], axis=1, inplace=True)
results = load_results_from_json('nc-election/precinct_results_allen_ervin.json')

In [45]:
dem_name = results.columns[2]
rep_name = results.columns[1]

results['d_margin'] = (results[dem_name]-results[rep_name])/(results[dem_name]+results[rep_name])*100

In [40]:
county_names = ['CUMBERLAND', 'HOKE', 'HARNETT']
counties = cid_table.loc[cid_table[0].isin(county_names)]

In [41]:
with open('nc-election/Voting_Precincts.geojson','r') as f:
    shapes_all = gpd.read_file(f)

In [49]:
def make_geo_table(results, names):
    shapes = shapes_all.loc[shapes_all.loc[:,'county_nam'].isin(names)]
    shapes['pid'] = shapes.loc[:,'county_nam'].astype(str) + '_' + shapes.loc[:,'prec_id']
    orange = shapes.merge(results.loc[(results.County.isin(names))], how='right', on='pid')
    return orange

In [50]:
orange = make_geo_table(results, county_names)
orange.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


Unnamed: 0,objectid,id,prec_id,enr_desc,county_nam,of_prec_id,county_id,blockid,st_areashape,st_perimetershape,geometry,pid,Trey Allen,Sam J. Ervin IV,County,d_margin
0,1400.0,1447.0,AH49,ARRAN HILLS,CUMBERLAND,,26.0,,40447260.0,32002.060092,"POLYGON ((-78.98134 35.00998, -78.98062 35.010...",CUMBERLAND_AH49,377,829,CUMBERLAND,37.47927
1,1401.0,1448.0,AL51,ALDERMAN,CUMBERLAND,,26.0,,844025700.0,128235.936098,"POLYGON ((-78.82717 34.85070, -78.82706 34.850...",CUMBERLAND_AL51,1807,944,CUMBERLAND,-31.370411
2,1464.0,1553.0,CC01,CROSS CREEK 01,CUMBERLAND,,26.0,,87025240.0,46719.185093,"POLYGON ((-78.89773 35.04147, -78.89755 35.040...",CUMBERLAND_CC01,97,606,CUMBERLAND,72.403983


In [20]:
def format_margin(m):
    if m<0:
        return f'R+{abs(m):.1f}'
    else:
        return f'D+{m:.1f}'

In [51]:
max_abs = max(abs(orange['d_margin'].min()), orange['d_margin'].max())
orange['margin_str'] = orange['d_margin'].map(format_margin)

# TODO change this to use a prettier red
alt.Chart(orange).mark_geoshape(
        stroke='black'
    ).encode(
    alt.Color('d_margin:Q', 
              scale=alt.Scale(
                  domain=[-50,50],
                  range=['#D40000','#E6E6E6','#1666CB'],
                  interpolate='rgb',
                  clamp=True
              )
            ),
    tooltip=[
        alt.Tooltip('pid:N', title='Precinct'),
        alt.Tooltip('margin_str:N',title='Margin')
    ]
).project('identity', reflectY=True).properties(
    width=800,
    height=600
)

  for col_name, dtype in df.dtypes.iteritems():


In [59]:
results_ervin = results
results_beasley = load_results_from_csv('nc-election/precinct_results_senate.csv')
results_both = results_ervin.merge(results_beasley, on='pid')
results_both = results_both[['pid','d_margin_x','d_margin_y']]
results_both.rename(columns={'d_margin_x':'sc_ervin_margin', 'd_margin_y':'sen_margin'}, inplace=True)

In [77]:
import statsmodels.formula.api as sm

sm.ols('sen_margin ~ sc_ervin_margin', data=results_both).fit().params

Intercept          0.977908
sc_ervin_margin    1.025207
dtype: float64

In [78]:
chart = alt.Chart(results_both).mark_circle().encode(
    x='sc_ervin_margin:Q',
    y='sen_margin:Q',
    tooltip='pid:N'
)
regression = chart.transform_regression('sc_ervin_margin','sen_margin').mark_line().encode(
    color=alt.value('orange')
)
chart + regression

  for col_name, dtype in df.dtypes.iteritems():
