In [2]:
import pandas as pd
%load_ext autoreload
%autoreload 2

In [3]:
import django
import os

os.environ['DJANGO_SETTINGS_MODULE'] = 'dashboard.settings'
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
django.setup()

In [4]:
columns = [
    "voter_id",
    "contacted",
    "supporter",
    "yardisgn",
    "county_code",
    "county_name",
    "last_name",
    "first_name",
    "middle_maiden_name",
    "name_suffix",
    "race",
    "gender",
    "birthdate",
    "dob",
    "residence_house_number",
    "residence_street_name",
    "residence_apt_unit_nbr",
    "residence_city",
    "residence_zipcode",
    "county_precinct_id",
    "congressional_district",
    "senate_district",
    "house_district",
    "judicial_district",
    "commission_district",
    "landline_number",
    "cellphone_number",
    "gopdel",
    "prespref16",
    "genpri16",
    "genpri16run",
    "gen16",
    "sp18",
    "pri18",
    "pri18aory",
    "prirunoff18",
    "prirunoff18ay",
    "gen18",
    "gen18ro",
    "novmu19",
    "decmurunoff19",
    "sp19",
    "sprunoff19",
    "sp20",
    "prespref20",
    "prespref20abs",
    "prim20",
    "prim20abs",
    "prirunoff20",
    "prirunoff20abs",
    "gen20",
    "genrunoff21",
    "sp21",
    "munov21",
    "murunoff21",
    "spec22",
    "pri22",
    "pri22abs",
    "prirunoff2022",
    "prirunoff2022abs",
    "registrationaddresslatitude",
    "registrationaddresslongitude"
]

dtypes = {
    "voter_id": str,
    "contacted": str,
    "supporter": str,
    "yardisgn": str,
    "county_code": str,
    "county_name": str,
    "last_name": str,
    "first_name": str,
    "middle_maiden_name": str,
    "name_suffix": str,
    "race": str,
    "gender": str,
    "birthdate": int,
    "dob": str,
    "residence_house_number": str,
    "residence_street_name": str,
    "residence_apt_unit_nbr": str,
    "residence_city": str,
    "residence_zipcode": str,
    "county_precinct_id": str,
    "congressional_district": str,
    "senate_district": str,
    "house_district": str,
    "judicial_district": str,
    "commission_district": str,
    "landline_number": str,
    "cellphone_number": str,
    "gopdel": str,
    "prespref16": str,
    "genpri16": str,
    "genpri16run": str,
    "gen16": str,
    "sp18": str,
    "pri18": str,
    "pri18aory": str,
    "prirunoff18": str,
    "prirunoff18ay": str,
    "gen18": str,
    "gen18ro": str,
    "novmu19": str,
    "decmurunoff19": str,
    "sp19": str,
    "sprunoff19": str,
    "sp20": str,
    "prespref20": str,
    "prespref20abs": str,
    "prim20": str,
    "prim20abs": str,
    "prirunoff20": str,
    "prirunoff20abs": str,
    "gen20": str,
    "genrunoff21": str,
    "sp21": str,
    "munov21": str,
    "murunoff21": str,
    "spec22": str,
    "pri22": str,
    "pri22abs": str,
    "prirunoff2022": str,
    "prirunoff2022abs": str,
    "registrationaddresslatitude": str,
    "registrationaddresslongitude": str
}

In [5]:

wl = pd.read_csv('../resources/hd51/walk_list_2022-8-3.csv', names=columns, dtype=dtypes)

In [6]:
def fix_voter_id(voter_id):
    return f'{int(voter_id):08d}'


def fix_county_code(county_code):
    return f'{int(county_code):03d}'


def fix_float(value):
    return float(value) if len(value.strip()) > 0 else None


def strip_column(value):
    return value.strip()


wl = wl.assign(voter_id=wl.voter_id.apply(fix_voter_id),
               lat=wl.registrationaddresslatitude.apply(fix_float),
               lon=wl.registrationaddresslongitude.apply(fix_float),
               county_code=wl.county_code.apply(fix_county_code),
               residence_apt_unit_nbr=wl.residence_apt_unit_nbr.apply(strip_column))


In [52]:
import plotly.express as px
from segmentation.utils import categorize_age
import geopandas as gdf
from hse_map.models import HseMap
from voter.models import ListEdition
from datetime import datetime

In [85]:
wl = wl.assign(gen=categorize_age(wl.birthdate))
wl_gdf = gdf.GeoDataFrame(wl, geometry=gdf.points_from_xy(wl.lon, wl.lat), crs=HseMap.CRS_LAT_LON)

In [53]:
edition_date = datetime.strptime('2022-08-05', '%Y-%m-%d')
edition = ListEdition.objects.get(date=edition_date)
hd51 = HseMap.objects.get(district='051')
hd51.edition = edition
hd51_map = hd51.as_geodataframe
hd51_vtd = hd51.district_vtd_map

In [None]:
hd51_vtd.head()

In [None]:
hd51_map.sindex.valid_query_predicates

In [None]:
wl_hd51_overlay = wl_gdf.overlay(hd51_vtd, how='union', keep_geom_type=False)
wl_hd51_inside = wl_gdf.sjoin(hd51_map, how='inner')
wl_hd51_outside = wl[~wl.voter_id.isin(wl_hd51_inside.voter_id)]
print(f'There are {len(wl_hd51_outside.index)} outside HD51!')

In [None]:
wl_hd51_overlay.head()

In [97]:
def build_hover_text(row):
    return f"""
    <b>{row.county_precinct_id}</b><br>
    {row.first_name} {row.last_name}<br>
    {row.residence_house_number} {row.residence_street_name} {row.residence_apt_unit_nbr}<br>
    {row.residence_city} {row.residence_zipcode}<extra></extra>
    """

In [None]:
wl_gdf = wl_gdf.assign(hover_text=wl_gdf.apply(build_hover_text, axis=1))
wl_gdf.shape

In [104]:
from core.models import BaseFig, PartyTallyMapConfig

In [None]:
import json

fig = px.scatter_mapbox(wl_gdf,
                        lat="lat",
                        lon="lon",
                        color="county_precinct_id",
                        labels={'county_precinct_id': 'Precinct'},
                        custom_data=['hover_text'],
                        hover_data={
                            "county_precinct_id":True,
                            "hover_text": True
                        })
fig.update_layout(
    mapbox={
        'zoom': 11.5,
        "style": "open-street-map",
        "layers": [
            {
                "source": json.loads(hd51_map.geometry.to_json()),
                "below": "traces",
                "type": "line",
                "color": "purple",
                "line": {"width": 2},
                "name": 'district_boundary'
            },
            {
                "source": json.loads(hd51_vtd.geometry.to_json()),
                "below": "district_boundary",
                "type": "line",
                "color": 'black',
                "line": {"width": .75},
            }
        ],
    },
    margin={"l": 0, "r": 0, "t": 0, "b": 0})

fig.update_traces(
    hovertemplate='%{customdata[0]}',
    hoverinfo=None
)

BaseFig.add_watermark(fig)

config = PartyTallyMapConfig('../resources/fig_config/hd51/summary.json')

BaseFig.add_logo(fig, config)

In [106]:
fig.write_html('../workproducts/hd51/walklist.html')

In [None]:
wl_hd51_outside.head()

In [None]:
len(wl_hd51_outside)

In [15]:
wl_hd51_outside.drop(columns=['geometry']).to_csv('../workproducts/hd51/walk_list_lat_lon_outside_district.csv')

In [16]:
from hse_map.models import HseMap
from voter.models import ListEdition
from datetime import datetime
from segmentation.voter_segmentation import VoterSegmentation

In [17]:
edition_date = datetime.strptime('2022-08-05', '%Y-%m-%d')
edition = ListEdition.objects.get(date=edition_date)
hd51 = HseMap.objects.get(district='051')
hd51.edition = edition

In [18]:
hd51_voter_ids = [v.voter_id for v in hd51.voters]

In [19]:
wl_voter_ids = wl.voter_id.unique()

In [20]:
missing = set(wl_voter_ids) - set(hd51_voter_ids)

In [21]:
from voter.models import Voter

inactive = Voter.objects.filter(status='I', hse='051', edition=edition)
wl_inactive = wl[wl.voter_id.isin([v.voter_id for v in inactive])]
wl_inactive.to_csv('../workproducts/hd51/walk_list_inactive.csv')

In [22]:
len(wl_inactive)

528

In [23]:
missing = missing - set(wl_inactive.voter_id)
wl_not_in_district = wl[wl.voter_id.isin(missing)]
wl_not_in_district.to_csv('../workproducts/hd51/walk_list_not_in_district.csv')

In [24]:
len(wl_not_in_district)

472

In [25]:
hd51_vs = VoterSegmentation(hd51.voters)
smry = hd51_vs.history_summary()
score = hd51_vs.score_voters(smry)

Gather time: 19.8
County Info Time: 0.0
First/Last Time: 0.1
Add Missing Records Time: 0.4
Pivot Time: 3.6
Add County Time: 0.0
Load Voter History Summary Time: 0.0
Compute Ops Time: 0.9
Compute max_ballots_cast Time: 0.0
Compute ballots_cast Time: 0.0
Compute gn_max Time: 0.0
Compute pn_max Time: 0.0
Compute gn Time: 0.0
Compute rn Time: 0.0
Compute dn Time: 0.0
Compute gr Time: 0.0
Compute pr Time: 0.0
Compute ra Time: 0.0
Reorder Time: 0.0


In [26]:
wl_score = wl[['voter_id']].merge(score, on='voter_id', how='inner')

In [27]:
len(wl_score)

15326

In [28]:
sum(wl_score.ra < 0.3)

501

In [29]:
dems = wl[wl.voter_id.isin(wl_score[wl_score.ra < .4].voter_id)]
dems.to_csv('../workproducts/hd51/walk_list_probable_dems.csv')

In [30]:

dems.shape

(1133, 66)

In [31]:
wl_inactive.to_csv('../workproducts/hd51/walk_list_inactive.csv')

In [32]:
sum(wl_score.ra > 0.5)

3272

In [33]:
sum(score.ra > 0.5)

7422

In [34]:
smry.head()

Unnamed: 0,voter_id,county_code,2014-05-20,2014-11-04,2016-05-24,2016-11-08,2018-05-22,2018-11-06,2020-06-09,2020-11-03,2022-05-24
0,23890,60,XP,GG,XP,GG,RP,GG,XP,GG,RP
1,24057,60,XP,XG,XP,GG,XP,GG,XP,GG,RP
2,24542,60,XP,GG,XP,GG,XP,GG,XP,GG,RP
3,28720,60,XP,GG,RP,GG,XP,GG,XP,GG,RP
4,29643,60,XP,XG,XP,GG,XP,XG,XP,GG,XP


In [35]:
smry_r = smry[smry['2022-05-24']=='RP']

In [102]:
smry_d = smry[smry['2022-05-24']=='DP']
len(smry_d)

4340

In [103]:
wl_in_smry_d = wl[wl.voter_id.isin(smry_d.voter_id)]
len(wl_in_smry_d)

548

In [36]:
len(smry_r)

6156

In [40]:
wl_in_smry_r = wl[wl.voter_id.isin(smry_r.voter_id)]

In [41]:
len(smry_r) - len(wl_in_smry_r)

3877

In [43]:
smry_r_not_in_wl = smry_r[~smry_r.voter_id.isin(wl.voter_id)]

In [44]:
smry_r_not_in_wl.head()

Unnamed: 0,voter_id,county_code,2014-05-20,2014-11-04,2016-05-24,2016-11-08,2018-05-22,2018-11-06,2020-06-09,2020-11-03,2022-05-24
0,23890,60,XP,GG,XP,GG,RP,GG,XP,GG,RP
1,24057,60,XP,XG,XP,GG,XP,GG,XP,GG,RP
13,52890,60,XP,GG,XP,GG,XP,GG,RP,GG,RP
22,85155,60,XP,GG,RP,GG,XP,GG,RP,GG,RP
25,104980,60,XP,GG,XP,GG,XP,GG,RP,GG,RP


In [45]:
len(wl)

16766

In [46]:
N = 7
for i in range(N):
    print(2*(N-i+1)/(N*(N+1)))

0.2857142857142857
0.25
0.21428571428571427
0.17857142857142858
0.14285714285714285
0.10714285714285714
0.07142857142857142
