## 1. Transit Score

In [193]:
import pandas as pd
from pandas.io.json import json_normalize
import requests #might need to install this

**Transit Score Documentation:** https://www.walkscore.com/professional/public-transit-api.php <br>
**Google Docs of Support Places:** https://docs.google.com/spreadsheets/d/1pz8GtZKEPISjq-pbYzfRJRD59nLI-_VGgLZiWFAv4gw/edit?hl=en&hl=en#gid=0 <br>
**Requests Documentatiion:** https://pypi.org/project/requests/

In [276]:
def get_data(lat, lng):
    '''
    Gets data for one location.
    
    Inputs:
        lat, lng (str)
    Returns:
        (json)
    '''
    rv = None
    key = '4c4eb18a1eea25128110eaf683aefab4'
    url = 'https://transit.walkscore.com/transit/score/?lat={}&lon={}&wsapikey={}'.format(lat, lng, key)
    r = requests.get(url)
    if not r.status_code is None:
        rv = r.json()
    
    return rv

In [277]:
#Example for Logan Sqaure
get_data('41.9231', '-87.7093')

In [233]:
#Example invalid lat, long
get_data('23', '-56')

In [278]:
def create_datalist(df):
    '''
    Turns list of jsons into a dataframe
    
    Inputs:
        acsse data frame (pandas Dataframe)
    Returns:
        list containing transit score for all places for which it is available (list of json)
    '''
    datalist = []
    
    for row in df.itertuples():
        data = get_data(row.lat, row.lng)
        print(data)
        if not data is None:
            data["Place_Name"] = row.Place_Name
            data_list.append(data)
    
    return datalist

In [None]:
def create_dataframe(data_list):
    '''
    Given the list of json objects containing transit score data, create dataframe.
    '''    
    df = pd.DataFrame.from_dict(json_normalize(data_list), orient='columns')
    df = df.drop(columns=['help_link', 'logo_url'])
    df[['nearby_routes', 'bus', 'rail', 'other']] = df['summary'].str.findall(r'\d+').apply(pd.Series)
    
    return df

In [201]:
#This was an old example to show the end product. Quota may be met for today, so do not rerun this code.
#Note that my code should produce this dataframe with a column for "Place_Name" as well. It does not show here
#because I met my daily quota.
create_dataframe(chi_neighborhoods)

Unnamed: 0,description,summary,transit_score,ws_link,nearby_routes,bus,rail,other
0,Excellent Transit,"9 nearby routes: 7 bus, 2 rail, 0 other",70,https://www.walkscore.com/score/loc/lat=41.923...,9,7,2,0
1,Good Transit,"10 nearby routes: 9 bus, 1 rail, 0 other",65,https://www.walkscore.com/score/loc/lat=41.794...,10,9,1,0


## 2. Loading Census Data

In [124]:
import censusdata

#Loading raw data
acsse = censusdata.download("acsse", 2018, censusdata.censusgeo([("state", "17"), ("place", "*")]), \
                            ["K200101_001E",  "GEO_ID"]).reset_index()

#Manipulations for clarity of columns
acsse = acsse.rename(columns={"K200101_001E": "Population", "index": "censusgeo"})
acsse["censusgeo"] = acsse["censusgeo"].astype(str)

#Isolate place name and place type from censusgeo object
acsse["Location"] = acsse["censusgeo"].str.extract(r'^(.+?),')

#Creating separate columns for place name and place type
acsse["Place_Name"] = acsse["Location"].str.extract(r'(.+)\b\w+$')
acsse["Place_Type"] = acsse["Location"].str.extract(r'\b(\w+)$')

acsse["Place_Name"] = acsse["Place_Name"].str.strip()
acsse["Place_Type"] = acsse["Place_Type"].str.strip()

In [125]:
acsse.shape

(128, 6)

In [126]:
acsse.head()

Unnamed: 0,censusgeo,Population,GEO_ID,Location,Place_Name,Place_Type
0,"Wheeling village, Illinois: Summary level: 160...",38877,1600000US1781087,Wheeling village,Wheeling,village
1,"Glen Ellyn village, Illinois: Summary level: 1...",27558,1600000US1729756,Glen Ellyn village,Glen Ellyn,village
2,"Normal town, Illinois: Summary level: 160, sta...",55152,1600000US1753234,Normal town,Normal,town
3,"DeKalb city, Illinois: Summary level: 160, sta...",43888,1600000US1719161,DeKalb city,DeKalb,city
4,"Collinsville city, Illinois: Summary level: 16...",25400,1600000US1715599,Collinsville city,Collinsville,city


## 3. Linking Census "Places" with Longitude and Latitude

**Link to US Cities Database**: https://simplemaps.com/data/us-cities <br>
In my directory, it is entitled "uscities.csv". I've added "uscities.csv" to our repo.

In [60]:
#Load data
cities = pd.read_csv("uscities.csv")
#Look at only Illinois cities
il_cities = cities[cities['state_id']=='IL']

In [67]:
il_cities.shape

(1364, 19)

In [79]:
il_cities.head()

Unnamed: 0,city,city_ascii,state_id,state_name,county_fips,county_name,county_fips_all,county_name_all,lat,lng,population,density,source,military,incorporated,timezone,ranking,zips,id
15521,Trenton,Trenton,IL,Illinois,17027,Clinton,17027,Clinton,38.607,-89.6844,2638.0,599.0,polygon,False,True,America/Chicago,3,62293,1840010764
15522,Strawn,Strawn,IL,Illinois,17105,Livingston,17105,Livingston,40.6537,-88.3999,95.0,71.0,polygon,False,True,America/Chicago,3,61775,1840011789
15523,Tilton,Tilton,IL,Illinois,17183,Vermilion,17183,Vermilion,40.0941,-87.6398,2584.0,311.0,polygon,False,True,America/Chicago,3,61834 61833 61832,1840012188
15524,Bluford,Bluford,IL,Illinois,17081,Jefferson,17081,Jefferson,38.3256,-88.7366,670.0,171.0,polygon,False,True,America/Chicago,3,62814,1840012910
15525,Gillespie,Gillespie,IL,Illinois,17117,Macoupin,17117,Macoupin,39.1258,-89.8173,5089.0,817.0,polygon,False,True,America/Chicago,3,62033,1840008489


Notice that the "Place_Name" column in acsse dataframe matches with "city" column in il_cities dataframe regardless of whether the "Place_Type" is "city" or "village".

In [128]:
il_cities[il_cities["city"]==acsse["Place_Name"][0]]

Unnamed: 0,city,city_ascii,state_id,state_name,county_fips,county_name,county_fips_all,county_name_all,lat,lng,population,density,source,military,incorporated,timezone,ranking,zips,id
16279,Wheeling,Wheeling,IL,Illinois,17031,Cook,17031,Cook,42.1308,-87.924,38562.0,1716.0,polygon,False,True,America/Chicago,3,60090 60078,1840011334


In [148]:
#Merging acsse and il_cities
merged = pd.merge(acsse, il_cities, how="inner", left_on="Place_Name", right_on="city")

In [149]:
#Same shape as acsse, so all entries are matched
merged.shape

(128, 25)

In [151]:
merged.head(2)

Unnamed: 0,censusgeo,Population,GEO_ID,Location,Place_Name,Place_Type,city,city_ascii,state_id,state_name,...,lng,population,density,source,military,incorporated,timezone,ranking,zips,id
0,"Wheeling village, Illinois: Summary level: 160...",38877,1600000US1781087,Wheeling village,Wheeling,village,Wheeling,Wheeling,IL,Illinois,...,-87.924,38562.0,1716.0,polygon,False,True,America/Chicago,3,60090 60078,1840011334
1,"Glen Ellyn village, Illinois: Summary level: 1...",27558,1600000US1729756,Glen Ellyn village,Glen Ellyn,village,Glen Ellyn,Glen Ellyn,IL,Illinois,...,-88.0629,28045.0,1595.0,polygon,False,True,America/Chicago,3,60137 60138,1840011405


In [159]:
#Removing any columns not related to location (they all come from il_cities and that data is one year behind)
df = merged.drop(columns=['city', 'city_ascii', 'county_fips_all', 'county_name_all', 'population', 'density', \
                          'source', 'military','incorporated', 'timezone', 'ranking'])

In [166]:
df.head(2)

Unnamed: 0,censusgeo,Population,GEO_ID,Location,Place_Name,Place_Type,state_id,state_name,county_fips,county_name,lat,lng,zips,id
0,"Wheeling village, Illinois: Summary level: 160...",38877,1600000US1781087,Wheeling village,Wheeling,village,IL,Illinois,17031,Cook,42.1308,-87.924,60090 60078,1840011334
1,"Glen Ellyn village, Illinois: Summary level: 1...",27558,1600000US1729756,Glen Ellyn village,Glen Ellyn,village,IL,Illinois,17043,DuPage,41.8667,-88.0629,60137 60138,1840011405


In [263]:
#The following code cannot be run because quota is met for today. But the sequence would look something like this.
data_list = create_datalist(df[['Place_Name', 'lat', 'lng']])
ts = create_dataframe(data_list)
tsdf = pd.merge(ts, df, how="inner", left_on="Place_Name", right_on="Place_Name")