In [2]:
import scipy.stats
import matplotlib.pyplot as plt
import copy
from sklearn import linear_model

import matplotlib
from scipy import stats
from scipy.optimize import curve_fit
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import seaborn as sns
import json
import statsmodels.api as sm

import pandas as pd
import numpy as np
import folium
from folium.features import DivIcon
import requests
from bs4 import BeautifulSoup
import re

%matplotlib inline

  from pandas.core import datetools


In [3]:
import sys
!{sys.executable} -m pip install squarify



In [4]:
#existing data
floridaFrame = pd.read_csv('FloridaResults.csv')
floridaCounties = ["Alachua","Baker","Bay","Bradford","Brevard","Broward","Calhoun","Charlotte","Citrus","Clay","Collier","Columbia","DeSoto","Dixie","Duval","Escambia","Flagler","Franklin","Gadsden","Gilchrist","Glades","Gulf","Hamilton","Hardee","Hendry","Hernando","Highlands","Hillsborough","Holmes","Indian River","Jackson","Jefferson","Lafayette","Lake","Lee","Leon","Levy","Liberty","Madison","Manatee","Marion","Martin","Miami-Dade","Monroe","Nassau","Okaloosa","Okeechobee","Orange","Osceola","Palm Beach","Pasco","Pinellas","Polk","Putnam","Santa Rosa","Sarasota","Seminole","St. Johns","St. Lucie","Sumter","Suwannee","Taylor","Union","Volusia","Wakulla","Walton","Washington"]

In [None]:
#lets compute centroids of counties

In [5]:
def centeroidnp(arr):
    length = arr.shape[0]
    sum_x = np.sum(arr[:, 0])
    sum_y = np.sum(arr[:, 1])
    return sum_x/length, sum_y/length

In [6]:
counties = pd.read_json('us-counties.json')
floridacounties = []
floridaPoints = []
for i in range(len(counties['features'])):
    county = counties.iloc[i]['features']
    name = county["properties"]["name"]
    name = name.strip()
    if(county["id"][0:2] == "12"):
         floridacounties.append(county)
         floridaPoints.append(county["geometry"]["coordinates"][0])
jsonDict = {"type": "FeatureCollection", "features": floridacounties}
with open('fl-counties.json', 'w') as outfile:  
    json.dump(jsonDict, outfile)

In [32]:
#construct map of florida partisan leans for election years
def constructMap(df, year):    
    foliumMap = folium.Map(location=(28, -81), zoom_start=6)
    yearCol = "Partisan Lean " + str(year)
    legendCol = "Partisan Lean " + str(year)
    foliumMap.choropleth(geo_data=geo, data=df,
             columns=['counties', yearCol],
             key_on='feature.properties.name', threshold_scale=[-.10, -.05, 0, .05, .10],
             fill_color='RdBu', fill_opacity=0.7, line_opacity=0.2,
             legend_name=legendCol)
    a = 0
    for county in df['counties']:
        latitude = None
        longitude = None
        longitude, latitude = centeroidnp(np.array(floridaPoints[a]))
        folium.features.Marker(location=[latitude, longitude], icon=DivIcon(
        icon_size=(10,8),
        icon_anchor=(0,0),
        html='<div style="font-size: 6pt">'+ county + '</div>')).add_to(foliumMap)
        a += 1
    return foliumMap

In [33]:
geo = r'fl-counties.json'
slider = widgets.IntSlider(value=1976, min=1976, max=2016, step=4)
dfSlider = widgets.Select(options=list(floridaFrame))
interact(constructMap, df=dfSlider, year=slider)
dfSlider.close()
None

In [None]:
def constructTreemapForYear(year, df):    
    cmap = matplotlib.cm.RdBu
    dfSorted = df.sort_values(str(year)+"Total", ascending=False)
    mini=min(dfSorted['Partisan Lean '+str(year)])
    maxi=max(dfSorted['Partisan Lean '+str(year)])
    norm = matplotlib.colors.Normalize(vmin=-1, vmax=1)
    colors = [cmap(norm(value)) for value in dfSorted['Partisan Lean '+str(year)]]
    
    plt.rc('font', size=22)
    squarify.plot(sizes=dfSorted[str(year)+"Total"], label=dfSorted['counties'], alpha=0.8, color=colors )
    plt.axis('off')
    fig = plt.gcf()
    fig.set_size_inches(20, 16)
    plt.show()

In [None]:
slider = widgets.IntSlider(value=1976, min=1976, max=2016, step=4)
dfSlider = widgets.Select(options=list(floridaFrame))
interact(constructTreemapForYear, year=slider, df=dfSlider)
dfSlider.close()
None

In [None]:
def plotSums(df):
    demCountiesSums = []
    repCountiesSums = []
    demAverageLean = []
    repAverageLean = []
    years = list(range(1976, 2020, 4))
    for year in years:
        demList = []
        repList = []
        for county in df['Partisan Lean ' + str(year)]:
          if (county > 0):
             demList.append(county)
          else:
             repList.append(county) 
        
    demCountiesSums.append(len(demList))
    repCountiesSums.append(len(repList))
    demAverageLean.append((sum(demList)*1.0)/len(demList))
    repAverageLean.append((sum(repList)*1.0)/len(repList))
    sumDataframe = pd.DataFrame({'year': years, 'demList': demCountiesSums, 'repList': repCountiesSums})
    f, ax = plt.subplots(1, 1)
    sns.pointplot(x="year", y="demList", data=sumDataframe, color='blue')
    sns.pointplot(x="year", y="repList", data=sumDataframe, color='red')
    ax.set_xlabel("Year")
    ax.set_ylabel("Number of counties")
    ax.set_yticks(list(range(0, 70, 5)))

In [None]:
def plotLeans(df):
    demAverageLean = []
    repAverageLean = []
    years = list(range(1976, 2020, 4))
    for year in years:
    demList = []
    repList = []
    for county in floridaResults['Partisan Lean ' + str(year)]:
        if (county > 0):
           demList.append(county)
        else:
           repList.append(county) 
    demAverageLean.append((sum(demList)*1.0)/len(demList))
    repAverageLean.append((sum(repList)*1.0)/len(repList))
    leanDF = pd.DataFrame({'year': years, 'demList': demAverageLean, 'repList': repAverageLean})
    f, ax = plt.subplots(1, 1)
    sns.pointplot(x="year", y="demList", data=leanDF, color='blue')
    sns.pointplot(x="year", y="repList", data=leanDF, color='red')
    ax.set_xlabel("Year")
    ax.set_ylabel("Partisan Lean")

    yList = [-.2]
    s = -.20
    for a in range(20):
        s += .02
        yList.append(s)
    
    ax.set_yticks(yList)

In [None]:
def func(x, a, b):
    return a*np.log(x) + b

In [None]:
def extendedMap(df):
    for i in range(0, len(df)):
        partisans = []
        for year in range(1988, 2020, 4):
            partisans.append(df.get_value(i, 'Partisan Lean ' + str(year)))
        
        popt, pcov = curve_fit(func, list(range(1988, 2020, 4)), partisans, maxfev=10000)
    
        for year in range(2020, 2044, 4):
            value = func(year, *popt)
            df.set_value(i, 'Partisan Lean '+str(year), value)

In [None]:
slider = widgets.IntSlider(value=1976, min=1976, max=2040, step=4)
interact(extendedMap, year=slider)
None