---

# Interacting with Data (DATA 303 Visualization Assignment 2)

---

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import folium
import plotly.express as px
from warnings import filterwarnings
filterwarnings(action='ignore')
%matplotlib inline

## Part 1 Task: Parallel Categories Plot
---

In [2]:
vote_subset = pd.read_csv('./data/vote_subset.csv')
vote_subset.head(3)

Unnamed: 0,RespID,ppl_like_you,President,Congress,Supreme_Court,CDC,Election_officials,Intelligence,Media,Police,USPS,age,education,race,gender,income_cat,voter_cat
0,470001,2.0,3.0,4.0,2.0,1.0,1.0,1.0,1.0,2.0,4.0,73,College,White,Female,$75-125k,always
1,470002,2.0,2.0,3.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,90,College,White,Female,$125k or more,always
2,470003,1.0,3.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,1.0,53,College,White,Male,$125k or more,sporadic


In [3]:
vote_subset['voter_cat'].unique()

array(['always', 'sporadic', 'rarely/never'], dtype=object)

In [4]:
vote_subset['voter_cat'] = vote_subset['voter_cat'].apply(lambda x: x.title())
vote_subset['voter_cat_dicot'] = vote_subset['voter_cat'].apply(lambda x: 0 if x == 'Rarely/Never' else 1 if x == 'Sporadic' else 2)

In [33]:
cat_dim = go.parcats.Dimension(
    values=vote_subset['voter_cat'],
    categoryorder = 'array',
    label="Voting Frequency"
)

educ_dim = go.parcats.Dimension(
    values=vote_subset['education'],
    label= 'Level of Education',
    categoryorder = 'array'
)

income_dim = go.parcats.Dimension(
    values=vote_subset['income_cat'],
    categoryorder = 'array',
    categoryarray = ['$125k or more','$75-125k', '$40-75k','Less than $40k'],
    label="Income Category"
)

color = vote_subset.voter_cat_dicot

fig = go.Figure(data = [go.Parcats(dimensions=[cat_dim, educ_dim, income_dim],
                                  line = {'color':color, 'colorscale': 'purd','shape':'hspline'},
                                  labelfont = {'size':30, 'family':'Arial'},
                                  tickfont = {'size':17,'family':'Arial'})])

fig.update_coloraxes(showscale=False)
fig.show()
fig.write_image('categories.png')
fig.write_html('categories.html')

## Part 2 Task: Bubble Chart
---

In [6]:
election_data = pd.read_csv('./data/election_data.csv')
election_data = election_data.sort_values('year')
election_data.head(3)

Unnamed: 0,locality,provisional_ballots,absentee_ballots,in_person_ballots,in_person_curbside_ballots,TotalVoteTurnout,ActiveRegisteredVoters,InactiveRegisteredVoters,TotalRegisteredVoters,year,region
0,ACCOMACK COUNTY,0,466,9289,0,9755,21576,2002,23578,2014,eastern
97,POWHATAN COUNTY,6,417,9320,10,9743,18612,729,19341,2014,central
96,PORTSMOUTH CITY,43,957,23683,256,24683,55720,6454,62174,2014,hampton_roads


Add new variables 
- pct_absentee: This is calculated as absentee_ballots/TotalVoteTurnout*100 
- pct_turnout: This is calculated as TotalVoteTurnout/ActiveRegisteredVoters*100

In [7]:
election_data['pct_absentee'] = ((election_data['absentee_ballots']/election_data['TotalVoteTurnout'])*100)

In [8]:
election_data['pct_turnout'] = (election_data['TotalVoteTurnout'] / election_data['ActiveRegisteredVoters'])*100

In [9]:
election_data = election_data.loc[election_data['pct_turnout']<=100]

In [10]:
#Edit regions column into title case, so it looks good on the visualization
election_data['region'] = election_data['region'].apply(lambda x: x.title().replace('_',' '))

In [65]:
fig = px.scatter(election_data, x='pct_turnout', y='pct_absentee',
                 size = "TotalRegisteredVoters",
                 size_max= 50,
                 color = 'region',
                 hover_name="locality",
                 labels = {'locality':'Locality','pct_turnout':'Percent Turnout', 'pct_absentee':'Percent Absentee',
                           'TotalRegisteredVoters': 'Total Registered Voters','year':'Year','region':'Region'},
                 hover_data = {'locality':False,'region':False,'year':False,'TotalRegisteredVoters':True,
                              'pct_turnout':':.2f','pct_absentee':':.0f'},
                 animation_frame='year',
                 color_discrete_sequence=["red", "green", "blue", "goldenrod", "magenta","purple","pink","black"],
                 height = 700, width = 1100
               )

fig.update_layout(
    title={
        'text': "Percent Turnout vs. Percent Absentee",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    font={
        'family':"arial",
        'size':20,
        'color':"Black",
        }
)

fig.update_traces(marker = {'size':10,'opacity':0.5})
fig.update_xaxes(range=[0, 100])
fig.update_yaxes(range=[-10, 100])
fig.write_html('bubble.html')
fig.write_image('bubble.png')
fig.show()

## Part 3 Task: Choropleth Map
---

In [12]:
election_data_choro = pd.read_csv('./data/election_data.csv')
election_2022 = election_data_choro.loc[election_data_choro['year']==2022]
election_2022['pct_turnout'] = ((election_2022['TotalVoteTurnout'] / election_2022['ActiveRegisteredVoters'])*100)
election_2022['locality'] = election_2022['locality'].apply(lambda x: x.title())
election_2022['locality'] = election_2022['locality'].apply(lambda x: 'King and Queen County' if x == 'King And Queen County' else x)
election_2022['locality'] = election_2022['locality'].apply(lambda x: 'Isle of Wight County' if x == 'Isle Of Wight County' else x)

In [13]:
election_2022 = election_2022.reset_index(drop=True)

In [14]:
election_2022['pct_turnout']

0      55.217265
1      47.126978
2      54.196723
3      51.970775
4      56.421181
         ...    
128    53.321344
129    47.094344
130    39.203063
131    46.849207
132    58.140364
Name: pct_turnout, Length: 133, dtype: float64

In [15]:
geo_json = './data/counties_VA.json'
US_CENTER = [38,-79.3]

In [67]:
m = folium.Map(US_CENTER, zoom_start = 7,tiles = None, height = 500, width = 1000)
c = folium.Choropleth(geo_json, data = election_2022, columns = ['locality','pct_turnout'],
                     key_on='feature.properties.NAME', highlight = True, fill_color='YlOrRd',
                     bins = 7, legend_name = 'Percent of Voters in 2022',fill_opacity = 0.5).add_to(m)
    
folium.GeoJsonTooltip(fields = ['NAME'], aliases=[''],
                      style = ('background-color:grey;color:white;font-size:medium;\
                      padding-left:0px')).add_to(c.geojson)

for i in range(len(election_2022)+1):
    name = c.geojson.data['features'][i]['properties']['NAME']
    pct_turnout = election_2022[election_2022['locality'] == name]['pct_turnout'].values
    c.geojson.data['features'][i]['properties']['pct_turnout'] = str(int(round(pct_turnout[0])))+'%'

folium.GeoJsonPopup(fields = ['pct_turnout'], aliases= ['Percent Turnout'],
                    style = ('background-color:grey;color:white;font-size:medium;\
                    padding-left:0px')).add_to(c.geojson)

title_text = 'Voter Turnout in Virginia Cities and Counties (2022)'
title_html = f'<h1 align="center" style="font-size:26px;font-weight:italic;font:"arial">{title_text}</h1>'
m.get_root().html.add_child(folium.Element(title_html))
m.save('map.html')
m