# All material ©2019, Alex Siegman

---

## Welcome to 'Projects in Programming & Data Science' – we're going to jump right in to the mix. 

### Today we are going to leverage the CitiBike API to populate a MySQL database at regular intervals. Consider this your warm-up for the semester. 

---

### https://streamdata.io/developers/api-gallery/new-york-citibike-api/

In [169]:
# first, let's request the json from the CitiBike API URL

import json 
import urllib.request

with urllib.request.urlopen("https://gbfs.citibikenyc.com/gbfs/en/station_status.json") as url:
    data = json.loads(url.read().decode())
    print(data)

{'last_updated': 1564507469, 'ttl': 10, 'data': {'stations': [{'station_id': '168', 'num_bikes_available': 11, 'num_ebikes_available': 0, 'num_bikes_disabled': 1, 'num_docks_available': 35, 'num_docks_disabled': 0, 'is_installed': 1, 'is_renting': 1, 'is_returning': 0, 'last_reported': 1564507245, 'eightd_has_available_keys': False, 'eightd_active_station_services': [{'id': 'bedaaf2b-8664-469e-8681-26ff8059765b'}]}, {'station_id': '281', 'num_bikes_available': 4, 'num_ebikes_available': 0, 'num_bikes_disabled': 2, 'num_docks_available': 59, 'num_docks_disabled': 1, 'is_installed': 1, 'is_renting': 1, 'is_returning': 0, 'last_reported': 1564507332, 'eightd_has_available_keys': True, 'eightd_active_station_services': [{'id': '32461582-cd1e-4ecf-a5ea-563593fa7009'}]}, {'station_id': '285', 'num_bikes_available': 0, 'num_ebikes_available': 0, 'num_bikes_disabled': 1, 'num_docks_available': 0, 'num_docks_disabled': 0, 'is_installed': 1, 'is_renting': 0, 'is_returning': 0, 'last_reported': 1

In [170]:
stations = data['data']['stations']

In [171]:
import pandas as pd

df_stations = pd.DataFrame(stations)
df_stations.head()

Unnamed: 0,eightd_active_station_services,eightd_has_available_keys,is_installed,is_renting,is_returning,last_reported,num_bikes_available,num_bikes_disabled,num_docks_available,num_docks_disabled,num_ebikes_available,station_id
0,[{'id': 'bedaaf2b-8664-469e-8681-26ff8059765b'}],False,1,1,0,1564507245,11,1,35,0,0,168
1,[{'id': '32461582-cd1e-4ecf-a5ea-563593fa7009'}],True,1,1,0,1564507332,4,2,59,1,0,281
2,,True,1,0,0,1564507226,0,1,0,0,0,285
3,[{'id': 'a58d9e34-2f28-40eb-b4a6-c8c01375657a'}],True,1,1,0,1564507453,10,1,22,0,0,304
4,[{'id': '8ec29d39-9642-466a-9a20-aad1e5c4788a'}],False,1,1,0,1564507063,7,1,29,0,0,337


In [163]:
import MySQLdb

In [164]:
db = MySQLdb.connect(passwd="BigBernie3",db="citibike")

In [165]:
c = db.cursor()

In [174]:
# create table

c.execute("CREATE TABLE IF NOT EXISTS Practice1 (station_id int, num_ebikes_available int, num_docks_disabled int, num_docks_available int, num_bikes_disabled int, num_bikes_available int, last_reported varchar(250), is_returning int, is_renting int, is_installed int, eightd_has_available_keys bool);")

c.close()
db.commit()

In [175]:
from datetime import datetime

# populate table

# we fetch for now just the time-invariant data. Notice that we have the INSERT IGNORE so that even when we add the same entry
# again, we do not get an error that the line exists. We do get warnings, but this is expected

c = db.cursor()

query_template = """INSERT IGNORE INTO Practice1(station_id, num_bikes_available) VALUES (%s, %s);"""

# num_docks_disabled, num_docks_available, num_bikes_disabled, num_bikes_available, last_reported, is_returning, is_installed, eightd_has_available_keys, eightd_active_station_services) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"

for entry in stations:
    station_id = int(entry['station_id'])
    
    #num_ebikes_available = int(entry['num_ebikes_available'])
    # num_docks_disabled = int(entry['num_docks_disabled'])
    # num_docks_available = int(entry['num_docks_available'])
    # num_bikes_disabled = int(entry['num_bikes_disabled'])
    
    num_bikes_available = int(entry['num_bikes_available'])
    
    # last_reported = str(entry['last_reported'])
    # is_returning = int(entry['is_returning'])
    # is_renting = int(entry['is_renting'])
    # is_installed = int(entry['is_installed'])
    # eightd_has_available_keys = bool(entry['eightd_has_available_keys'])
                           
    print("Inserting station", station_id, num_bikes_available) # , num_ebikes_available, num_docks_disabled, num_docks_available,num_bikes_disabled, num_bikes_available, last_reported, is_returning, is_renting, is_installed, eightd_has_available_keys)
    
    query_parameters = (station_id, num_bikes_available) # , num_ebikes_available, num_docks_disabled, num_docks_available, num_bikes_disabled, num_bikes_available, last_reported, is_returning, is_renting, is_installed, eightd_has_available_keys)
   
    c.execute(query_template, query_parameters)

c.close()
db.commit()

Inserting station 168 11
Inserting station 281 4
Inserting station 285 0
Inserting station 304 10
Inserting station 337 7
Inserting station 347 13
Inserting station 359 13
Inserting station 377 20
Inserting station 388 13
Inserting station 402 6
Inserting station 426 15
Inserting station 484 16
Inserting station 491 10
Inserting station 520 1
Inserting station 3092 0
Inserting station 3233 6
Inserting station 3443 4
Inserting station 3459 7
Inserting station 3461 15
Inserting station 3467 11
Inserting station 72 3
Inserting station 79 10
Inserting station 82 1
Inserting station 83 20
Inserting station 116 33
Inserting station 119 13
Inserting station 120 2
Inserting station 127 0
Inserting station 128 29
Inserting station 143 18
Inserting station 144 14
Inserting station 146 12
Inserting station 150 0
Inserting station 151 30
Inserting station 157 4
Inserting station 161 3
Inserting station 164 41
Inserting station 167 33
Inserting station 173 1
Inserting station 174 19
Inserting stati

Inserting station 3338 29
Inserting station 3339 0
Inserting station 3340 19
Inserting station 3341 18
Inserting station 3342 21
Inserting station 3343 2
Inserting station 3344 19
Inserting station 3345 19
Inserting station 3346 3
Inserting station 3347 15
Inserting station 3348 18
Inserting station 3349 0
Inserting station 3350 0
Inserting station 3351 3
Inserting station 3352 20
Inserting station 3354 3
Inserting station 3355 4
Inserting station 3356 5
Inserting station 3357 0
Inserting station 3358 0
Inserting station 3359 6
Inserting station 3360 0
Inserting station 3361 2
Inserting station 3362 2
Inserting station 3363 14
Inserting station 3364 5
Inserting station 3365 1
Inserting station 3366 0
Inserting station 3367 30
Inserting station 3368 9
Inserting station 3369 0
Inserting station 3370 1
Inserting station 3372 9
Inserting station 3373 25
Inserting station 3374 10
Inserting station 3375 1
Inserting station 3376 2
Inserting station 3377 25
Inserting station 3378 6
Inserting s

In [184]:
c = db.cursor()

c.execute("SELECT * FROM Practice1 LIMIT1;")
rows = c.fetchall()
print(rows)

c.close()
db.commit()

((168, None, None, None, None, 11, None, None, None, None, None), (281, None, None, None, None, 4, None, None, None, None, None), (285, None, None, None, None, 0, None, None, None, None, None), (304, None, None, None, None, 10, None, None, None, None, None), (337, None, None, None, None, 7, None, None, None, None, None), (347, None, None, None, None, 13, None, None, None, None, None), (359, None, None, None, None, 13, None, None, None, None, None), (377, None, None, None, None, 20, None, None, None, None, None), (388, None, None, None, None, 13, None, None, None, None, None), (402, None, None, None, None, 6, None, None, None, None, None), (426, None, None, None, None, 15, None, None, None, None, None), (484, None, None, None, None, 16, None, None, None, None, None), (491, None, None, None, None, 10, None, None, None, None, None), (520, None, None, None, None, 1, None, None, None, None, None), (3092, None, None, None, None, 0, None, None, None, None, None), (3233, None, None, None, None