In [1]:
import pandas as pd
from tqdm import tqdm
import time
import json
cta = pd.read_csv('CTA_RAIL.csv')

# Get Station Reviews

In [2]:
GOOGLE_API_KEY = # My google API key 

In [107]:
stations = []
for _, row in cta.iterrows():
    longitude, latitude = [float(point) for point in re.findall(r"[-+]?\d*\.\d+", row['the_geom'])]
    stations.append({
        'station_name': row['LONGNAME'],
        'longitude': longitude,
        'latitude': latitude,
    })

In [108]:
def get_station_id(station):

    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    # Query with types that commonly represent CTA/rail stops
    all_results = []
    keyword, lat, lng = station['station_name'], station['latitude'], station['longitude']
    for place_type in ["train_station", "subway_station"]:
        params = {
            "key": GOOGLE_API_KEY,
            "location": f"{lat},{lng}",
            "radius": 1000,
            "type": place_type,
            "keyword": keyword,
        }
        r = requests.get(url, params=params, timeout=30)
        data = r.json()
        results = data['results']
        all_results.append(results)
    
    result, station_id = None, None
    for results in all_results:
        if results != []:
            result = results[0]
            break
    
    if result is not None:
        station_id = result['place_id']

    return station_id

In [110]:
def get_station_reviews(station_id):

    url = 'https://maps.googleapis.com/maps/api/place/details/json'
    fields = 'name,place_id,rating,user_ratings_total,formatted_address,reviews'
    params = {
        "key": GOOGLE_API_KEY,
        "place_id": station_id,
        "fields": fields
    }
    r = requests.get(url, params=params, timeout=30)
    r.raise_for_status()
    raw_reviews = r.json()
    if data.get('status') != 'OK':
            raise RuntimeError(f"Place Details error: {data.get('status')} - {data.get('error_message')}")
        
    station_reviews = raw_reviews['result']['reviews']
    station_rating = raw_reviews['result']['rating']
    station_rater_num = raw_reviews['result']['user_ratings_total']
    station_reviews = [
        {'text': review['text'], 'rating': review['rating'], 'time': review['relative_time_description']}
        for review in station_reviews]
    
    station_all_reviews = {
        'station_rating': station_rating,
        'station_rater_num': station_rater_num,
        'station_reviews': station_reviews
    }

    return station_all_reviews

In [112]:
stations_reviews = []

for station in tqdm(stations):

    station_id = get_station_id(station)
    if station_id is not None:
        station_review = get_station_reviews(station_id)
        stations_reviews.append(station_review)
    else:
        print(f"No location found for station {station['station_name']}")
        stations_reviews.append({})

 42%|█████████████████▋                        | 61/145 [01:08<01:33,  1.12s/it]

No location found for station 51st


100%|█████████████████████████████████████████| 145/145 [02:46<00:00,  1.15s/it]


In [129]:
cta['reviews'] = [json.dumps(item) for item in stations_reviews]
cta['longitude'] = [station['longitude'] for station in stations]
cta['latitude'] = [station['latitude'] for station in stations]

cta.to_csv('cta_train_stations.csv', index=False)

# Get Park Reviews

In [146]:
events = pd.read_csv('event_dataset.csv')

In [139]:
chicago_parks = event['geocoding query'].unique()

In [170]:
def get_park_id(park):

    url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    # Query with types that commonly represent CTA/rail stops
    all_results = []
    keyword, lat, lng = park['park_name'], park['latitude'], park['longitude']
    for place_type in ["park", "swimming_pool", "tourist_attraction"]:
        params = {
            "key": GOOGLE_API_KEY,
            "location": f"{lat},{lng}",
            "radius": 1000,
            "type": place_type,
            "keyword": keyword,
        }
        r = requests.get(url, params=params, timeout=30)
        data = r.json()
        results = data['results']
        all_results.append(results)
    
    result, station_id = None, None
    for results in all_results:
        if results != []:
            result = results[0]
            break
            
    park_id = None
    if result is not None:
        park_id = result['place_id']

    return park_id

In [174]:
def get_park_reviews(park_id):

    url = 'https://maps.googleapis.com/maps/api/place/details/json'
    fields = 'name,place_id,rating,user_ratings_total,formatted_address,reviews'
    params = {
        "key": GOOGLE_API_KEY,
        "place_id": park_id,
        "fields": fields
    }
    r = requests.get(url, params=params, timeout=30)
    r.raise_for_status()
    raw_reviews = r.json()
    if data.get('status') != 'OK':
            raise RuntimeError(f"Place Details error: {data.get('status')} - {data.get('error_message')}")
    
    park_reviews = raw_reviews['result']['reviews'] if 'reviews' in raw_reviews['result'].keys() else []
    park_rating = raw_reviews['result']['rating'] if 'rating' in raw_reviews['result'].keys() else None
    park_rater_num = raw_reviews['result']['user_ratings_total'] if 'user_ratings_total' in raw_reviews['result'].keys() else 0
    park_reviews = [
        {'text': review['text'], 'rating': review['rating'], 'time': review['relative_time_description']}
        for review in park_reviews]
    
    park_all_reviews = {
        'park_rating': park_rating,
        'park_rater_num': park_rater_num,
        'park_reviews': park_reviews
    }

    return park_all_reviews

In [155]:
parks = []
events_parks = []

for _, event in events.iterrows():
    parks.append({
        'park_name': event['geocoding query'],
        'latitude': event['latitude'],
        'longitude': event['longitude']
    })

cleaned_parks = {}
for park in parks:
    if park['park_name'] not in cleaned_parks.keys():
        cleaned_parks[park['park_name']] = park
parks = [v for k, v in cleaned_parks.items()]

In [175]:
parks_reviews = []

for park in tqdm(parks):

    park_id = get_park_id(park)
    if park_id is not None:
        park_review = get_park_reviews(park_id)
        parks_reviews.append(park_review)
    else:
        print(f"No location found for park {park['park_name']}")
        parks_reviews.append({})
        
    time.sleep(.2)

  2%|▉                                         | 12/553 [00:25<18:48,  2.09s/it]

No location found for park hayes gymnasium, chicago, il, usa


  5%|█▉                                        | 25/553 [00:56<22:10,  2.52s/it]

No location found for park montrose park, chicago, il, usa


  5%|██▎                                       | 30/553 [01:06<18:16,  2.10s/it]

No location found for park chicago park district park 100, chicago, il, usa


  7%|███                                       | 41/553 [01:29<18:24,  2.16s/it]

No location found for park the park, chicago, il, usa


  9%|███▊                                      | 50/553 [01:47<17:20,  2.07s/it]

No location found for park park no. park, chicago, il, usa


 10%|████▎                                     | 56/553 [02:00<17:18,  2.09s/it]

No location found for park burnham park, chicago, il, usa


 10%|████▎                                     | 57/553 [02:01<16:19,  1.97s/it]

No location found for park calumet e park, chicago, il, usa


 20%|████████▎                                | 112/553 [03:59<15:19,  2.09s/it]

No location found for park oak street park, chicago, il, usa


 23%|█████████▎                               | 125/553 [04:27<14:53,  2.09s/it]

No location found for park lincoln-2011 n. park, chicago, il, usa


 26%|██████████▌                              | 142/553 [05:02<13:57,  2.04s/it]

No location found for park lincoln lawn, chicago, il, usa


 28%|███████████▋                             | 157/553 [05:34<13:51,  2.10s/it]

No location found for park ward park, chicago, il, usa


 32%|█████████████▎                           | 179/553 [06:20<13:34,  2.18s/it]

No location found for park burroughs park, chicago, il, usa


 46%|██████████████████▉                      | 255/553 [08:59<09:45,  1.96s/it]

No location found for park brown park, chicago, il, usa


 49%|████████████████████▏                    | 273/553 [09:36<10:06,  2.17s/it]

No location found for park park. no. park, chicago, il, usa


 50%|████████████████████▍                    | 276/553 [09:43<10:16,  2.23s/it]

No location found for park chicago park district park 4, chicago, il, usa


 50%|████████████████████▌                    | 278/553 [09:48<09:55,  2.17s/it]

No location found for park south park, chicago, il, usa


 68%|███████████████████████████▉             | 377/553 [13:15<06:40,  2.28s/it]

No location found for park abbott junior park, chicago, il, usa


 77%|███████████████████████████████▋         | 427/553 [14:59<04:00,  1.91s/it]

No location found for park palmer (bertha park, chicago, il, usa


 99%|████████████████████████████████████████▍| 546/553 [19:03<00:14,  2.02s/it]

No location found for park the prairie park, chicago, il, usa


100%|█████████████████████████████████████████| 553/553 [19:18<00:00,  2.10s/it]
