# Chicago Bike Infrastructure Project
<h3>Capstone Project for Data Analytics Certificate<br>
University of Texas<br><br>
Samantha Goodman
<Br>December 2021</h3><br>
This notebook is part 1 of a 5 part series.<br>
    <b>1 - Bike Shops from FourSquare API<br></b>
        2 - Bike Infrastructure<br>
        3 - Background information about neighborhoods<br>
        4 - Analysis<br>
        5 - Model Building and Predictions<br><br>
    Questions this project aims to answer:<br>
    <ul><li>Which community areas (neighborhoods) have the most bike infrastructure, and which have the least?</li>
<li>Are there areas that show an unmet demand for bike infrastructure (higher rates of Divvy trips, but lower rates of bike lanes and repair shops)?</li>
<li>Can I predict bike infrastructure levels based on demographic or community health data?

In [5]:
# Import the necessary libraries
import pandas as pd
import numpy as np
# requests for fetching html of website
import requests

In [6]:
# Foursquare API Login Info
CLIENT_ID = 'V3B13NWETEWPBCDTZN0ZV0DWICYSVMSNIXJ5YVPFM4ILS51M' # my Foursquare ID
CLIENT_SECRET = '2JSJLEWXWFCVC1LEEGYK5TXYPTQMNRZDEFZ1EX0TVHXXWRBK' # my Foursquare Secret
VERSION = '20211204' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: V3B13NWETEWPBCDTZN0ZV0DWICYSVMSNIXJ5YVPFM4ILS51M
CLIENT_SECRET:2JSJLEWXWFCVC1LEEGYK5TXYPTQMNRZDEFZ1EX0TVHXXWRBK


In [7]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [8]:
# function that returns venues from FourSquare, if given a zip code and category
def get_my_venues(zip_code, cat_name, cat_code, rad):
    LIMIT = 1000
    radius = rad
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&near={}&v={}&radius={}&limit={}&categoryId={}'.format(CLIENT_ID, CLIENT_SECRET, zip_code, VERSION, radius, LIMIT, cat_code)
    results = requests.get(url).json()
    venues = results['response']['groups'][0]['items']
    
    nearby_venues = pd.json_normalize(venues) # flatten JSON

    # filter columns
    filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng', 'venue.location.address', 'venue.location.city']
    nearby_venues =nearby_venues.loc[:, filtered_columns]

    # filter the category for each row
    nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

    # clean columns
    nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

    return nearby_venues




In [9]:
# Initializes new dataframe to store venues
our_venues = pd.DataFrame()

In [10]:
# Gets csv of zip codes in Chicago - cleaned from csv from City of Chicago Data Portal
#url = 'https://github.com/sjegoodman/walkable-winters/raw/main/Zip_Codes.csv'
chi_zips = pd.read_csv("Zip_Codes.csv")
zip_list = chi_zips.values.tolist()

In [11]:
# All my applicable category names - this time only bike shops
my_categories = {
    # FourSquare API documentation has new codes, but they don't work
    "Bike Shop" : "4bf58dd8d48988d115951735",
}

In [12]:
# For each category and zip code, calls the FourSquare API and appends the results to a our_venues dataframe
for cat, code in my_categories.items():
    # Calls function with each of Chicago's zip codes
    for i in range(len(zip_list)):
        try:
            my_venues = get_my_venues(zip_list[i][0], cat, code, 4000)
            # add it to our_venues df
            our_venues = our_venues.append(my_venues, ignore_index = True)
            print(cat + " added to list, size = " + str(our_venues.shape[0]) + " zipcode: " + str(zip_list[i][0]))
        except:
            print("Empty for this zip code: " + str(zip_list[i][0]))

Bike Shop added to list, size = 36 zipcode: 60647
Bike Shop added to list, size = 50 zipcode: 60639
Bike Shop added to list, size = 62 zipcode: 60707
Bike Shop added to list, size = 110 zipcode: 60622
Bike Shop added to list, size = 128 zipcode: 60651
Bike Shop added to list, size = 156 zipcode: 60611
Bike Shop added to list, size = 183 zipcode: 60638
Bike Shop added to list, size = 205 zipcode: 60652
Bike Shop added to list, size = 223 zipcode: 60626
Bike Shop added to list, size = 233 zipcode: 60615
Bike Shop added to list, size = 268 zipcode: 60621
Bike Shop added to list, size = 291 zipcode: 60645
Bike Shop added to list, size = 309 zipcode: 60643
Bike Shop added to list, size = 327 zipcode: 60660
Bike Shop added to list, size = 356 zipcode: 60640
Bike Shop added to list, size = 400 zipcode: 60614
Bike Shop added to list, size = 481 zipcode: 60631
Bike Shop added to list, size = 490 zipcode: 60646
Bike Shop added to list, size = 505 zipcode: 60628
Bike Shop added to list, size = 54

In [13]:
our_venues.shape

(1765, 6)

In [14]:
# Remove duplicate venues
our_venues.drop_duplicates(inplace=True, ignore_index=True)

In [16]:
our_venues.head()

Unnamed: 0,name,categories,lat,lng,address,city
0,The Bike Lane,Bike Shop,41.919867,-87.692609,2130 N Milwaukee Ave,Chicago
1,Boulevard Bikes,Bike Shop,41.93157,-87.71118,2769 N Milwaukee Ave,Chicago
2,BFF Bikes,Bike Shop,41.91936,-87.67778,2056 N Damen Ave,Chicago
3,Smart Bike Parts Inc,Bike Shop,41.917484,-87.703692,3031 W Armitage Ave,Chicago
4,Heritage Bicycles,Coffee Shop,41.93576,-87.66283,2959 N Lincoln Avenue,Chicago


In [17]:
our_venues.tail()

Unnamed: 0,name,categories,lat,lng,address,city
157,Village CycleSport,Bike Shop,42.026652,-87.986725,45 Arlington Heights Rd,Elk Grove Village
158,fast track,Bike Shop,41.98016,-87.904443,,Des Plaines
159,Good Speed Cycles,Bike Shop,41.557136,-87.669159,2551 183rd St,Homewood
160,Goodspeed Cycles - Homewood,Bike Shop,41.557005,-87.668746,2125 183rd St,Homewood
161,Speed Demon Powersports,Bike Shop,41.575878,-87.736801,4921 173rd St,Country Club Hills


In [18]:
# Initiate new dataframe to store Chicago only venues
chi_venues = pd.DataFrame()

In [19]:
# Remove non-Chicago venues
for i in range(len(our_venues)):
    if our_venues.loc[i, 'city'] == 'Chicago':
      chi_venues = chi_venues.append(our_venues.loc[i,:])

In [20]:
chi_venues.shape

(111, 6)

In [39]:
# Saves CSV of the venues
chi_venues.to_csv('bike_venues.csv')