In [None]:
# Data Analysis of 2016 US Elections

In [None]:
# Import packages
import sys
import pandas as pd
import csv
import plotly.express as px
import requests
import pandas as pd
from bs4 import BeautifulSoup
from uszipcode import SearchEngine, SimpleZipcode
import cenpy as cen

In [None]:
# import the two data sets
#
# This data set can be scraped from https://www.politico.com/2020-election/results/president/ using selenium
df1 = pd.read_csv('US_elections_2020.csv')
df1 = df1.drop(df1.columns[[0]], axis=1)
#
# This data set is for race and poverty percentages
df2 = pd.read_csv('US_Poverty_and_Race_Statistics_2020.csv')
# 
# This is number of people present in each state
df3 = pd.read_csv('US_states_and_territories.csv')

In [None]:
# Combine two data sets into one
df = df1.join(df2, lsuffix = '_df1', rsuffix = '_df2')
df = df.join(df3, lsuffix = '_df', rsuffix = '_df3')
df = df.drop(df.columns[[6,17]], axis=1)

df11 = df.drop(df.columns[[8,11,12,13,14,15]], axis=1)
df11.head(10)

In [None]:
import seaborn as sn
import matplotlib.pyplot as plt

df12 = df11.drop(df.columns[[3,5,10]], axis=1)

corr_matrix = df12.corr()

sn.heatmap(corr_matrix, annot=True)
plt.xticks(rotation=25, ha='right')
plt.savefig('correlation.png', bbox_inches="tight", dpi=1000)

In [None]:
import sys
# !{sys.executable} -m pip install kaleido
import kaleido #required
kaleido.__version__ #0.2.1

import plotly
plotly.__version__ #5.5.0

#now this works:
import plotly.graph_objects as go

In [None]:
# Plot showing election results
fig = px.choropleth(df, locations='State_code',
                            locationmode = 'USA-states',
                            scope = 'usa',
                            color = 'Biden_percent',
                            hover_name = 'state',
                            hover_data = ['Trump_votes', 'Biden_votes', 'Trump_percent', 'Biden_percent'],
                            range_color = [0, 100],
                            color_continuous_scale= 'RdBu',
                            title='US 2020 Elections'
                            )
fig.write_image('biden_percent.png', scale = 10)

In [None]:
# 
fig = px.choropleth(df, locations = 'State_code',
                            locationmode = 'USA-states',
                            scope = 'usa',
                            color = 'Median Household Income',
                            hover_name = 'state',
                            hover_data = ['Trump_votes', 'Biden_votes', 'Trump_percent', 'Biden_percent'],
                            range_color = [30000, 100000],
                            color_continuous_scale= 'RdBu',
                            title = 'US 2020 Median Incomes - Embedded Voting Habits'
                            )
fig.write_image('household.png', scale = 10)

# Looking at these two plots you can see that the states with the higher median
# incomes were less likely to vote for trump

In [None]:
max(df['All-age Poverty Rate'])

In [None]:
# 
fig = px.choropleth(df, locations = 'State_code',
                            locationmode = 'USA-states',
                            scope = 'usa',
                            color = 'All-age Poverty Rate',
                            hover_name = 'state',
                            hover_data = ['Trump_votes', 'Biden_votes', 'Trump_percent', 'Biden_percent'],
                            range_color = [5, 20],
                            color_continuous_scale= 'RdBu',
                            title = 'US 2020 Poverty Rate'
                            )

fig.write_image('poverty.png', scale = 10)

# Looking at these two plots you can see that the states with the higher median
# incomes were less likely to vote for trump

In [None]:
Trump_votes_scaled = df["Trump_votes"] / df["2020 pop."]
df4 = pd.DataFrame(Trump_votes_scaled, columns=['Trump_votes_scaled']) 
df4 = df.join(df4, lsuffix = '_df', rsuffix = '_df4')
# df4.head()
#
fig = px.density_heatmap(df4, x = "Biden_percent", y = "Median Household Income",  marginal_x = "histogram", marginal_y = "histogram")
fig.write_image('heatmapB.png', scale = 10)

# This plot suggests that the most likely trump supporters are from mid-low median income states.

In [None]:
# 
fig = px.density_heatmap(df4, x = "Trump_votes_scaled", y = "All-age Poverty Rate",  marginal_x = "histogram", marginal_y = "histogram")
fig.show()

# This plot seems initally, counterfactual to the previous as you would
# expect the states with poorer median income to be the ones with higher rates of unemployment

In [None]:
#
fig = px.density_heatmap(df4, x = "Trump_votes_scaled", y = "Percentage of White",  marginal_x = "histogram", marginal_y = "histogram")
fig.show()

# You can produce plots for all races but you see that the states that
# are actually causing this voting pattern are those that have high rates
# of unemployment among people who identify as white.  

In [None]:
# Lets break this down a bit further and take a look at all races given
#
race = ['Percentage of Hispanic or Latino', 'Percentage of White', 
        'Percentage of Black or African American', 
        'Percentage of American Indian and Alaska Native',
        'Percentage of Asian', 'Percentage of Native Hawaiian and Other Pacific Islander',
        'Percentage of Some Other Race', 'Percentage of Population of Two or More Race']
        
for i in range(0,7):
    fig = px.bar(df, x = "state", y = "Trump_percent", color = race[i])
    fig.show()
    
# Clearly there is not a vast amout of data avaliable for some states
# for certain minorities. However, if you look particularly at the
# unemployment percentages of caucasians and african americans specifically,
# there is more of a pattern that white unemployment rates had an effect on
# voting for Trump than was the case for African American voters.

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

urlMain = 'https://statisticalatlas.com/place/United-States/Overview'
urlAttainment = 'https://statisticalatlas.com/zip/{}/Educational-Attainment'
urlIncome = 'https://statisticalatlas.com/United-States/Household-Income'
urlRace = 'https://statisticalatlas.com/United-States/Race-and-Ethnicity'


def getPercentages(url):
    res = requests.get(url)
    if res.status_code == 200:
        soup = BeautifulSoup(res.content, "lxml")
        percentages = soup.select('[id="figure/educational-attainment"] rect title')
        percentages = [percentages[0].text,percentages[2].text,percentages[4].text]
        return percentages
    else:
        print(res.status_code, url)
        return []

def getCodes(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.content, "lxml")
    codes = [code.text for code in soup.select('.info-table-contents-div a[href*=zip]')]
    return codes

results = []

with requests.Session() as s:
    zipcodes = getCodes(urlMain)

    for zipcode in zipcodes:
        try:
            row = getPercentages(urlAttainment.format(zipcode))
            row.insert(0, zipcode)
            results.append(row)
        except IndexError as ex:
            print(ex,urlAttainment.format(zipcode))
df = pd.DataFrame(results,columns=['zipcode', 'HD', 'HS', 'NoHS'])
print(df)

In [None]:
df.head()

In [None]:
len(df['zipcode'])

In [None]:
from uszipcode import SearchEngine, SimpleZipcode
search = SearchEngine()

def zco(x):
    city = search.by_zipcode(x).state
    return city if city else 'None'

df['state'] = df['zipcode'].apply(zco)

In [None]:
df.head()

In [None]:
df.plot.bar(x = 'state', y = ['Biden_percent','Trump_percent'],figsize=(15,5))
plt.savefig('fig1.png', bbox_inches="tight", dpi=1000)

df.plot.bar(x = 'state', y = ['Percentage of White','Trump_percent'],figsize=(15,5))
plt.savefig('fig2.png', bbox_inches="tight", dpi=1000)

df.plot.bar(x = 'state', y = ['Percentage of Black or African American','Trump_percent'],figsize=(15,5))
plt.savefig('fig3.png', bbox_inches="tight", dpi=1000)

In [None]:
import seaborn as sns

sns.barplot(x = 'state', y = 'Biden_percent', data = df)