# All material ©2019, Alex Siegman

---

## Welcome to Projects in Programming & Data Science. Today we're going to jump right in to the mix and leverage the CitiBike API to populate a sqlite database at regular intervals. Consider this your warm-up for the semester! 

---

In [1]:
import sqlite3

In [2]:
con = sqlite3.connect('citibikeData.db') # create our database

## Now, let's check out the API we'll be working with:

### https://streamdata.io/developers/api-gallery/new-york-citibike-api/

#### First, we'll request the json from the CitiBike API URL and just print it out to get a quick glimpse

In [3]:
import json 
import urllib.request # https://docs.python.org/3/library/urllib.request.html

with urllib.request.urlopen("https://feeds.citibikenyc.com/stations/stations.json") as url:
    data = json.loads(url.read().decode())
    print(data)

{'executionTime': '2019-09-09 02:30:36 PM', 'stationBeanList': [{'id': 168, 'stationName': 'W 18 St & 6 Ave', 'availableDocks': 34, 'totalDocks': 47, 'latitude': 40.73971301, 'longitude': -73.99456405, 'statusValue': 'In Service', 'statusKey': 1, 'availableBikes': 11, 'stAddress1': 'W 18 St & 6 Ave', 'stAddress2': '', 'city': '', 'postalCode': '', 'location': '', 'altitude': '', 'testStation': False, 'lastCommunicationTime': '2019-09-09 02:29:10 PM', 'landMark': ''}, {'id': 281, 'stationName': 'Grand Army Plaza & Central Park S', 'availableDocks': 64, 'totalDocks': 66, 'latitude': 40.7643971, 'longitude': -73.97371465, 'statusValue': 'In Service', 'statusKey': 1, 'availableBikes': 0, 'stAddress1': 'Grand Army Plaza & Central Park S', 'stAddress2': '', 'city': '', 'postalCode': '', 'location': '', 'altitude': '', 'testStation': False, 'lastCommunicationTime': '2019-09-09 02:29:10 PM', 'landMark': ''}, {'id': 285, 'stationName': 'Broadway & E 14 St', 'availableDocks': 48, 'totalDocks': 5

In [4]:
stations = data['stationBeanList'] # iterate through the json to find the station data

In [5]:
import pandas as pd # we'll use pandas just to visualize our data, NOT to query it

df_stations = pd.DataFrame(stations)
df_stations.head() # check the first five station entries

# you'll note that it has set 'altitude' as our index; if we were working only in Pandas we'd probably want to 
# change that, but we can leave it alone for now

Unnamed: 0,altitude,availableBikes,availableDocks,city,id,landMark,lastCommunicationTime,latitude,location,longitude,postalCode,stAddress1,stAddress2,stationName,statusKey,statusValue,testStation,totalDocks
0,,11,34,,168,,2019-09-09 02:29:10 PM,40.739713,,-73.994564,,W 18 St & 6 Ave,,W 18 St & 6 Ave,1,In Service,False,47
1,,0,64,,281,,2019-09-09 02:29:10 PM,40.764397,,-73.973715,,Grand Army Plaza & Central Park S,,Grand Army Plaza & Central Park S,1,In Service,False,66
2,,3,48,,285,,2019-09-09 02:30:02 PM,40.734546,,-73.990741,,Broadway & E 14 St,,Broadway & E 14 St,1,In Service,False,53
3,,3,32,,298,,2019-09-09 02:28:28 PM,40.686832,,-73.979677,,3 Ave & Schermerhorn St,,3 Ave & Schermerhorn St,1,In Service,False,35
4,,12,20,,304,,2019-09-09 02:29:34 PM,40.704633,,-74.013617,,Broadway & Battery Pl,,Broadway & Battery Pl,1,In Service,False,33


In [6]:
sql = "CREATE TABLE IF NOT EXISTS StationsData (station_id int, stationName varchar(250), availableDocks int, totalDocks int, latitude float, longitude float, statusValue varchar(250), statusKey int, availableBikes int, stAddress1 varchar(250), stAddress2 varchar(250), city varchar(250), postalCode varchar(250), location varchar(250), altitude varchar(250), testStation bool, lastCommunicationTime date, landMark varchar(250));" 

con.execute(sql)
con.commit()

In [7]:
from datetime import datetime # import the datetime library 

In [8]:
query_template = """INSERT OR IGNORE INTO StationsData(station_id, stationName, availableDocks, totalDocks, latitude, \
longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, \
testStation, lastCommunicationTime, landMark) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"""

for entry in stations: # for every station entry in the json 
    station_id = int(entry['id']) # find and set station_id
    stationName = str(entry['stationName'])
    availableDocks = int(entry['availableDocks'])
    totalDocks = int(entry['totalDocks'])
    latitude = str(entry['latitude'])
    longitude = str(entry['longitude'])
    statusValue = str(entry['statusValue'])
    statusKey = int(entry['statusKey'])
    availableBikes = int(entry['availableBikes'])
    stAddress1 = str(entry['stAddress1'])
    stAddress2 = str(entry['stAddress2'])
    city = str(entry['city'])
    postalCode = str(entry['postalCode'])
    location = str(entry['location'])
    altitude = str(entry['altitude'])
    testStation = bool(entry['testStation'])
    lastCommunicationTime = entry['lastCommunicationTime']
    landMark = str(entry['landMark'])
                           
    print("Inserting Station:", station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
    
    query_parameters = (station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
    
    con.execute(query_template, query_parameters)
    
con.commit()

Inserting Station: 168 W 18 St & 6 Ave 34 47 40.73971301 -73.99456405 In Service 1 11 W 18 St & 6 Ave      False 2019-09-09 02:29:10 PM 
Inserting Station: 281 Grand Army Plaza & Central Park S 64 66 40.7643971 -73.97371465 In Service 1 0 Grand Army Plaza & Central Park S      False 2019-09-09 02:29:10 PM 
Inserting Station: 285 Broadway & E 14 St 48 53 40.73454567 -73.99074142 In Service 1 3 Broadway & E 14 St      False 2019-09-09 02:30:02 PM 
Inserting Station: 298 3 Ave & Schermerhorn St 32 35 40.68683208 -73.9796772 In Service 1 3 3 Ave & Schermerhorn St      False 2019-09-09 02:28:28 PM 
Inserting Station: 304 Broadway & Battery Pl 20 33 40.70463334 -74.01361706 In Service 1 12 Broadway & Battery Pl      False 2019-09-09 02:29:34 PM 
Inserting Station: 337 Old Slip & Front St 37 37 40.7037992 -74.00838676 In Service 1 0 Old Slip & Front St      False 2019-09-09 02:27:31 PM 
Inserting Station: 347 Greenwich St & W Houston St 29 35 40.728846 -74.008591 In Service 1 5 Greenwich St &

Inserting Station: 343 Clinton Ave & Flushing Ave 3 23 40.69794 -73.96986848 In Service 1 19 Clinton Ave & Flushing Ave      False 2019-09-09 02:29:43 PM 
Inserting Station: 344 Monroe St & Bedford Ave 23 23 40.6851443 -73.95380904 In Service 1 0 Monroe St & Bedford Ave      False 2019-09-09 02:28:25 PM 
Inserting Station: 346 Bank St & Hudson St 22 27 40.73652889 -74.00618026 In Service 1 4 Bank St & Hudson St      False 2019-09-09 02:27:23 PM 
Inserting Station: 349 Rivington St & Ridge St 57 61 40.71850211 -73.98329859 In Service 1 1 Rivington St & Ridge St      False 2019-09-09 02:29:26 PM 
Inserting Station: 350 Clinton St & Grand St 25 28 40.71559509 -73.9870295 In Service 1 3 Clinton St & Grand St      False 2019-09-09 02:30:18 PM 
Inserting Station: 351 Front St & Maiden Ln 3 39 40.70530954 -74.00612572 In Service 1 34 Front St & Maiden Ln      False 2019-09-09 02:27:10 PM 
Inserting Station: 353 S Portland Ave & Hanson Pl 24 27 40.68539567 -73.97431458 In Service 1 3 S Portlan

Inserting Station: 514 12 Ave & W 40 St 51 52 40.76087502 -74.00277668 In Service 1 0 12 Ave & W 40 St      False 2019-09-09 02:30:04 PM 
Inserting Station: 515 W 43 St & 10 Ave 35 35 40.76009437 -73.99461843 In Service 1 0 W 43 St & 10 Ave      False 2019-09-09 02:28:00 PM 
Inserting Station: 516 E 47 St & 1 Ave 7 31 40.75206862 -73.96784384 In Service 1 24 E 47 St & 1 Ave      False 2019-09-09 02:30:17 PM 
Inserting Station: 517 Pershing Square South 58 69 40.751581 -73.97791 In Service 1 9 Pershing Square South      False 2019-09-09 02:29:32 PM 
Inserting Station: 518 E 39 St & 2 Ave 20 39 40.74780373 -73.9734419 In Service 1 19 E 39 St & 2 Ave      False 2019-09-09 02:29:50 PM 
Inserting Station: 519 Pershing Square North 68 69 40.751873 -73.977706 In Service 1 1 Pershing Square North      False 2019-09-09 02:27:37 PM 
Inserting Station: 520 W 52 St & 5 Ave 11 41 40.75992262 -73.97648516 In Service 1 30 W 52 St & 5 Ave      False 2019-09-09 02:30:05 PM 
Inserting Station: 522 E 51 

Inserting Station: 3179 Park Ave & Marcus Garvey Blvd 12 23 40.698617 -73.941342 In Service 1 11 Park Ave & Marcus Garvey Blvd      False 2019-09-09 02:28:28 PM 
Inserting Station: 3182 Yankee Ferry Terminal 17 42 40.686931 -74.016966 In Service 1 24 Yankee Ferry Terminal      False 2019-09-09 02:28:01 PM 
Inserting Station: 3184 Paulus Hook 6 14 40.7141454 -74.0335519 In Service 1 6 Paulus Hook      False 2019-09-09 02:27:33 PM 
Inserting Station: 3185 City Hall 19 22 40.7177325 -74.043845 In Service 1 3 City Hall      False 2019-09-09 02:26:42 PM 
Inserting Station: 3186 Grove St PATH 18 42 40.71958611647166 -74.04311746358871 In Service 1 23 Grove St PATH      False 2019-09-09 02:29:06 PM 
Inserting Station: 3187 Warren St 16 22 40.7211236 -74.03805095 In Service 1 5 Warren St      False 2019-09-09 02:28:07 PM 
Inserting Station: 3191 Union St 3 18 40.7182113 -74.0836394 In Service 1 14 Union St      False 2019-09-09 02:29:22 PM 
Inserting Station: 3192 Liberty Light Rail 21 21 40.7

Inserting Station: 3374 Central Park North & Adam Clayton Powell Blvd 30 36 40.799484 -73.955613 In Service 1 4 Central Park North & Adam Clayton Powell Blvd      False 2019-09-09 02:28:45 PM 
Inserting Station: 3375 3 Ave & E 72 St 33 35 40.7699426 -73.96060712 In Service 1 2 3 Ave & E 72 St      False 2019-09-09 02:26:53 PM 
Inserting Station: 3376 E 65 St & 2 Ave 37 39 40.76471851944339 -73.96222069859505 In Service 1 2 E 65 St & 2 Ave      False 2019-09-09 02:29:58 PM 
Inserting Station: 3377 Carroll St & Bond St 11 25 40.6786115 -73.99037292 In Service 1 14 Carroll St & Bond St      False 2019-09-09 02:27:32 PM 
Inserting Station: 3378 E 76 St & Park Ave 28 28 40.773763 -73.96222088 In Service 1 0 E 76 St & Park Ave      False 2019-09-09 02:27:37 PM 
Inserting Station: 3379 E 103 St & Lexington Ave 3 35 40.7903051 -73.94755757 In Service 1 32 E 103 St & Lexington Ave      False 2019-09-09 02:29:08 PM 
Inserting Station: 3381 3 St & Hoyt St 2 21 40.6777287 -73.99364123 In Service 1

Inserting Station: 3569 Franklin Ave & St Marks Ave 29 29 40.6758324 -73.9561677 In Service 1 0 Franklin Ave & St Marks Ave      False 2019-09-09 02:30:33 PM 
Inserting Station: 3570 35 Ave & 37 St 29 31 40.7557327 -73.9236611 In Service 1 2 35 Ave & 37 St      False 2019-09-09 02:27:32 PM 
Inserting Station: 3571 Bedford Ave & Bergen St 24 24 40.676368 -73.952918 In Service 1 0 Bedford Ave & Bergen St      False 2019-09-09 02:28:20 PM 
Inserting Station: 3572 34 Ave & 38 St 31 33 40.756913 -73.921631 In Service 1 2 34 Ave & 38 St      False 2019-09-09 02:30:22 PM 
Inserting Station: 3573 35 St & 34 Ave 24 25 40.7580583 -73.9242751 In Service 1 1 35 St & 34 Ave      False 2019-09-09 02:28:28 PM 
Inserting Station: 3574 Prospect Pl & Underhill Ave 22 23 40.6769694 -73.96579 In Service 1 0 Prospect Pl & Underhill Ave      False 2019-09-09 02:27:30 PM 
Inserting Station: 3575 Crescent St & 34 Ave 18 19 40.76108 -73.930562 In Service 1 1 Crescent St & 34 Ave      False 2019-09-09 02:28:27 

Inserting Station: 3776 Central Ave & Starr Street 21 23 40.700003 -73.92834 In Service 1 2 Central Ave & Starr Street      False 2019-09-09 02:29:41 PM 
Inserting Station: 3777 Stockholm St & Wilson Ave 9 23 40.699304 -73.923044 In Service 1 14 Stockholm St & Wilson Ave      False 2019-09-09 02:28:26 PM 
Inserting Station: 3778 Irving Ave & DeKalb Ave 2 27 40.7027 -73.92095 In Service 1 25 Irving Ave & DeKalb Ave      False 2019-09-09 02:30:21 PM 
Inserting Station: 3779 Hart St & Wyckoff Ave 16 24 40.704876 -73.919911 In Service 1 8 Hart St & Wyckoff Ave      False 2019-09-09 02:29:08 PM 
Inserting Station: 3781 Greene Av & Myrtle Av 13 30 40.698568 -73.918877 In Service 1 17 Greene Av & Myrtle Av      False 2019-09-09 02:28:28 PM 
Inserting Station: 3782 Brooklyn Bridge Park - Pier 2 10 31 40.698458 -73.997178 In Service 1 21 Brooklyn Bridge Park - Pier 2      False 2019-09-09 02:27:55 PM 
Inserting Station: 3783 Cliff St & Fulton St 2 37 40.70838 -74.00495 In Service 1 35 Cliff St 

In [9]:
check = pd.read_sql("SELECT * FROM StationsData LIMIT 5", con=con)
check

Unnamed: 0,station_id,stationName,availableDocks,totalDocks,latitude,longitude,statusValue,statusKey,availableBikes,stAddress1,stAddress2,city,postalCode,location,altitude,testStation,lastCommunicationTime,landMark
0,168,W 18 St & 6 Ave,35,47,40.739713,-73.994564,In Service,1,11,W 18 St & 6 Ave,,,,,,0,2019-09-06 03:34:46 PM,
1,281,Grand Army Plaza & Central Park S,46,66,40.764397,-73.973715,In Service,1,18,Grand Army Plaza & Central Park S,,,,,,0,2019-09-06 03:32:58 PM,
2,285,Broadway & E 14 St,39,53,40.734546,-73.990741,In Service,1,13,Broadway & E 14 St,,,,,,0,2019-09-06 03:33:18 PM,
3,298,3 Ave & Schermerhorn St,27,35,40.686832,-73.979677,In Service,1,6,3 Ave & Schermerhorn St,,,,,,0,2019-09-06 03:34:52 PM,
4,304,Broadway & Battery Pl,28,33,40.704633,-74.013617,In Service,1,5,Broadway & Battery Pl,,,,,,0,2019-09-06 03:35:23 PM,


## Last but not least, let's set things up so that our database automatically updates every 15 seconds. 

In [None]:
import time 
import json 
import urllib.request
from datetime import datetime

while True:
    
    with urllib.request.urlopen("https://feeds.citibikenyc.com/stations/stations.json") as url:
        data = json.loads(url.read().decode())

    stations = data['stationBeanList'] # iterate through the json to find the station data

    con = sqlite3.connect('citibikeData.db') # connect to our db

    query_template = """INSERT OR IGNORE INTO StationsData(station_id, stationName, availableDocks, totalDocks, latitude, \
    longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, \
    testStation, lastCommunicationTime, landMark) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"""

    for entry in stations: # for every station entry in the json 
        station_id = int(entry['id']) # find and set station_id
        stationName = str(entry['stationName'])
        availableDocks = int(entry['availableDocks'])
        totalDocks = int(entry['totalDocks'])
        latitude = str(entry['latitude'])
        longitude = str(entry['longitude'])
        statusValue = str(entry['statusValue'])
        statusKey = int(entry['statusKey'])
        availableBikes = int(entry['availableBikes'])
        stAddress1 = str(entry['stAddress1'])
        stAddress2 = str(entry['stAddress2'])
        city = str(entry['city'])
        postalCode = str(entry['postalCode'])
        location = str(entry['location'])
        altitude = str(entry['altitude'])
        testStation = bool(entry['testStation'])
        lastCommunicationTime = entry['lastCommunicationTime']
        landMark = str(entry['landMark'])
                           
        print("Inserting Station:", station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
        query_parameters = (station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
       
        con.execute(query_template, query_parameters)
    
    con.commit()
        
    time.sleep(15)

---

## I hope this has helped you find your programming legs! Next week we'll get back to descriptive analytics using Python and Pandas. For now, take time to refresh yourself on the content covered in "Introduction to Programming". 

## If you need a referesher on your SQL skills, check out the "Supplementary Info" directory in the class repo.