## Importing Dependencies

In [1]:
# Importing dependencies 
import csv
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pprint as pp
import requests
# from config import census_key

## Reading in ACS Flows and Gazetter county data .csv's

In [2]:
# This is an optional piece of code that can be run in cases where the Census Bureau API query has already been 
# carried out and the notebook has been re-opened, so that further blocks of code can be run without having to 
# re-query the API
with open("../Resources/jsons/census_acs_query_result.json", "r") as read_file:
    response_dict = json.load(read_file)

In [3]:
county_zips_df = pd.read_csv("../Resources/great_circle/2019_Gaz_counties_national.csv")

In [4]:
state_abbreviations_df = pd.read_csv("../Resources/great_circle/state_abbreviations.csv")

In [5]:
county_zips_df.head()

Unnamed: 0,USPS,GEOID,ANSICODE,NAME,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
0,AL,1001,161526,Autauga County,1539602000.0,25706961,594.444,9.926,32.532237,-86.64644
1,AL,1003,161527,Baldwin County,4117622000.0,1132980868,1589.823,437.446,30.659218,-87.746067
2,AL,1005,161528,Barbour County,2292160000.0,50523213,885.008,19.507,31.870253,-85.405104
3,AL,1007,161529,Bibb County,1612167000.0,9602089,622.461,3.707,33.015893,-87.127148
4,AL,1009,161530,Blount County,1670104000.0,15015467,644.831,5.798,33.977358,-86.56644


In [6]:
county_zips_df.columns

Index(['USPS', 'GEOID', 'ANSICODE', 'NAME', 'ALAND', 'AWATER', 'ALAND_SQMI',
       'AWATER_SQMI', 'INTPTLAT',
       'INTPTLONG                                                                                                               '],
      dtype='object')

In [7]:
county_zips_df.rename(columns={"INTPTLONG                                                                                                               ":"INTPTLONG"}, inplace=True)

In [8]:
state_abbreviations_df.head()

Unnamed: 0,State,Abbrev,Code
0,Alabama,Ala.,AL
1,Alaska,Alaska,AK
2,Arizona,Ariz.,AZ
3,Arkansas,Ark.,AR
4,California,Calif.,CA


## Creating a list of unique county names from the ACS Flows query results

In [9]:
route_list = []
for i in response_dict:
    state_name = i
    for j in response_dict[state_name]:
        destination_county = j[0]
        destination_state = j[2]
        origin_county = j[1]
        origin_state = j[3]
        try:
            route_list.append([origin_county, origin_state, destination_county, destination_state])
        except TypeError:
            continue
route_df = pd.DataFrame(route_list, columns=["origin_county", "origin_state", "destination_county", "destination_state"])
route_df = route_df.drop([0])

In [10]:
route_df.head()

Unnamed: 0,origin_county,origin_state,destination_county,destination_state
1,,Africa,Alameda County,California
2,,Asia,Alameda County,California
3,,Central America,Alameda County,California
4,,Caribbean,Alameda County,California
5,,Europe,Alameda County,California


## Creating a new dataframe with the counties in unique_origin_county_list and their lat/lng coordinates

In [11]:
gazetter_plus_abbr_df = county_zips_df.merge(state_abbreviations_df, left_on="USPS", right_on="Code", how="left")

In [12]:
gazetter_plus_abbr_df.head()

Unnamed: 0,USPS,GEOID,ANSICODE,NAME,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG,State,Abbrev,Code
0,AL,1001,161526,Autauga County,1539602000.0,25706961,594.444,9.926,32.532237,-86.64644,Alabama,Ala.,AL
1,AL,1003,161527,Baldwin County,4117622000.0,1132980868,1589.823,437.446,30.659218,-87.746067,Alabama,Ala.,AL
2,AL,1005,161528,Barbour County,2292160000.0,50523213,885.008,19.507,31.870253,-85.405104,Alabama,Ala.,AL
3,AL,1007,161529,Bibb County,1612167000.0,9602089,622.461,3.707,33.015893,-87.127148,Alabama,Ala.,AL
4,AL,1009,161530,Blount County,1670104000.0,15015467,644.831,5.798,33.977358,-86.56644,Alabama,Ala.,AL


In [13]:
gazetter_plus_abbr_df = gazetter_plus_abbr_df[["NAME", "State", "Code", "INTPTLAT", "INTPTLONG"]]

In [14]:
gazetter_plus_abbr_df.head()

Unnamed: 0,NAME,State,Code,INTPTLAT,INTPTLONG
0,Autauga County,Alabama,AL,32.532237,-86.64644
1,Baldwin County,Alabama,AL,30.659218,-87.746067
2,Barbour County,Alabama,AL,31.870253,-85.405104
3,Bibb County,Alabama,AL,33.015893,-87.127148
4,Blount County,Alabama,AL,33.977358,-86.56644


In [15]:
route_df.head()

Unnamed: 0,origin_county,origin_state,destination_county,destination_state
1,,Africa,Alameda County,California
2,,Asia,Alameda County,California
3,,Central America,Alameda County,California
4,,Caribbean,Alameda County,California
5,,Europe,Alameda County,California


## Merging gazetter_pus_abbr_df and  route_df so that the resultant data frame displays the origin and destiantions counties with their lat/lng coordinates

In [26]:
great_circle_df = gazetter_plus_abbr_df.merge(route_df, 
                                              left_on=["NAME", "State"], 
                                              right_on=["origin_county", "origin_state"], 
                                              how="outer")

In [27]:
great_circle_df.head()

Unnamed: 0,NAME,State,Code,INTPTLAT,INTPTLONG,origin_county,origin_state,destination_county,destination_state
0,Autauga County,Alabama,AL,32.532237,-86.64644,Autauga County,Alabama,Santa Clara County,California
1,Baldwin County,Alabama,AL,30.659218,-87.746067,Baldwin County,Alabama,Alameda County,California
2,Baldwin County,Alabama,AL,30.659218,-87.746067,Baldwin County,Alabama,Collin County,Texas
3,Baldwin County,Alabama,AL,30.659218,-87.746067,Baldwin County,Alabama,Wayne County,Michigan
4,Barbour County,Alabama,AL,31.870253,-85.405104,Barbour County,Alabama,King County,Washington


In [28]:
great_circle_df = great_circle_df[["origin_county", "origin_state", "INTPTLAT", 
                                   "INTPTLONG", "destination_county", "destination_state"]]
great_circle_df = great_circle_df.rename(columns={"INTPTLAT": "origin_lat", "INTPTLONG": "origin_lng"})

In [29]:
great_circle_df = gazetter_plus_abbr_df.merge(great_circle_df, 
                                              left_on=["NAME", "State"], 
                                              right_on=["destination_county", "destination_state"], 
                                              how="right")

In [30]:
great_circle_df = great_circle_df[["origin_county", "origin_state", "origin_lat", 
                                   "origin_lng", "destination_county", "destination_state", "INTPTLAT", "INTPTLONG"]]
great_circle_df = great_circle_df.rename(columns={"INTPTLAT": "destination_lat", "INTPTLONG": "destination_lng"})

In [31]:
great_circle_df["route_name"] = (great_circle_df["origin_county"] 
                                 + ", "
                                 + great_circle_df["origin_state"]
                                 + " - " 
                                 + great_circle_df["destination_county"]
                                 + ", "
                                 + great_circle_df["destination_state"])

In [32]:
great_circle_df = great_circle_df[["origin_lat", 
                                   "origin_lng", 
                                   "destination_lat", 
                                   "destination_lng",
                                   "route_name"]]

In [33]:
great_circle_df.head()

Unnamed: 0,origin_lat,origin_lng,destination_lat,destination_lng,route_name
0,30.659218,-87.746067,37.647139,-121.912488,"Baldwin County, Alabama - Alameda County, Cali..."
1,33.770516,-85.827909,37.647139,-121.912488,"Calhoun County, Alabama - Alameda County, Cali..."
2,33.553444,-86.896536,37.647139,-121.912488,"Jefferson County, Alabama - Alameda County, Ca..."
3,34.764238,-86.55108,37.647139,-121.912488,"Madison County, Alabama - Alameda County, Cali..."
4,34.309564,-86.321668,37.647139,-121.912488,"Marshall County, Alabama - Alameda County, Cal..."


In [34]:
complete_great_circle_df = great_circle_df.dropna()

In [35]:
complete_great_circle_df.to_csv("../Resources/great_circle/complete_great_circle_data.csv")

## Using script from geodesiccalc.py to create a .kml file to use in Tableau 

In [37]:
# -*- coding: utf-8 -*-
"""
Created on Sun Aug  6 20:52:38 2017
@author: @framseger
"""

#impoprt packages:
from geographiclib.geodesic import Geodesic
import pandas as pd
from lxml import etree as ET

#set up kml structure:
kml = ET.Element('kml')
document = ET.SubElement(kml, 'Document')

#read data
df = pd.read_csv('../Resources/great_circle/complete_great_circle_data.csv')

#loop through routes, calculate waypoints:

for x in (df.index):
    olat = df.iloc[x,1]
    olon = df.iloc[x,2]
    dlat = df.iloc[x,3]
    dlon = df.iloc[x,4]
    route = df.iloc[x,5]
       

    p=Geodesic.WGS84.Inverse(olat, olon, dlat, dlon)
    l=Geodesic.WGS84.Line(p['lat1'],p['lon1'],p['azi1'])
    if (p['s12'] >= 1000000):
        num = int (p['s12']/100000)  #number of waypoints depending on length
    else:
        num=10
    output=''
    for i in range(num+1):
        b=l.Position(i*p['s12']/num, Geodesic.STANDARD | Geodesic.LONG_UNROLL)
        output += repr(b['lon2']) + "," + repr(b['lat2']) + ",0 "
    #print(str(x)+"/"+str(len(df))) #progess counter
    
    # inner part of kml file:
    placemark = ET.SubElement(document, 'Placemark')
    name = ET.SubElement(placemark, 'name')
    name.text = route
    description = ET.SubElement(placemark, 'description')
    description.text = 'route ID: '+str(x)+'; distance: '+str(p['s12'])
    linestring = ET.SubElement(placemark, 'LineString')
    coordinates = ET.SubElement(linestring, 'coordinates')
    coordinates.text = output
    

#complete kml file and save:
tree = ET.ElementTree(kml)
tree.write('../Resources/great_circle/acs_great_circle.kml', pretty_print=True, xml_declaration=True,   encoding="utf-8")