## Welcome to Projects in Programming & Data Science. 

## Today we're going to jump right in to the mix and leverage the CitiBike API to populate a sqlite database at regular intervals. Consider this your warm-up for the semester! 

---

In [None]:
import sqlite3

In [None]:
con = sqlite3.connect('citibikeData.db') # create our database

## Now, let's check out the API we'll be working with:

### https://streamdata.io/developers/api-gallery/new-york-citibike-api/

#### First, we'll request the json from the CitiBike API URL and just print it out to get a quick glimpse

In [None]:
import json 
import urllib.request # https://docs.python.org/3/library/urllib.request.html

with urllib.request.urlopen("https://feeds.citibikenyc.com/stations/stations.json") as url:
    data = json.loads(url.read().decode())
    print(data)

In [None]:
stations = data['stationBeanList'] # iterate through the json to find the station data

In [None]:
import pandas as pd # we'll use pandas just to visualize our data, NOT to query it

df_stations = pd.DataFrame(stations)
df_stations.head() # check the first five station entries

# you'll note that it has set 'altitude' as our index; if we were working only in Pandas we'd probably want to 
# change that, but we can leave it alone for now

In [None]:
sql = "CREATE TABLE IF NOT EXISTS StationsData (station_id int, stationName varchar(250), availableDocks int, totalDocks int, latitude float, longitude float, statusValue varchar(250), statusKey int, availableBikes int, stAddress1 varchar(250), stAddress2 varchar(250), city varchar(250), postalCode varchar(250), location varchar(250), altitude varchar(250), testStation bool, lastCommunicationTime date, landMark varchar(250));" 

con.execute(sql)
con.commit()

In [None]:
from datetime import datetime # import the datetime library 

In [None]:
query_template = """INSERT OR IGNORE INTO StationsData(station_id, stationName, availableDocks, totalDocks, latitude, \
longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, \
testStation, lastCommunicationTime, landMark) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"""

for entry in stations: # for every station entry in the json 
    station_id = int(entry['id']) # find and set station_id
    stationName = str(entry['stationName'])
    availableDocks = int(entry['availableDocks'])
    totalDocks = int(entry['totalDocks'])
    latitude = str(entry['latitude'])
    longitude = str(entry['longitude'])
    statusValue = str(entry['statusValue'])
    statusKey = int(entry['statusKey'])
    availableBikes = int(entry['availableBikes'])
    stAddress1 = str(entry['stAddress1'])
    stAddress2 = str(entry['stAddress2'])
    city = str(entry['city'])
    postalCode = str(entry['postalCode'])
    location = str(entry['location'])
    altitude = str(entry['altitude'])
    testStation = bool(entry['testStation'])
    lastCommunicationTime = entry['lastCommunicationTime']
    landMark = str(entry['landMark'])
                           
    print("Inserting Station:", station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
    
    query_parameters = (station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
    
    con.execute(query_template, query_parameters)
    
con.commit()

In [None]:
check = pd.read_sql("SELECT * FROM StationsData LIMIT 5", con=con)
check

## Last but not least, let's set things up so that our database automatically updates every 15 seconds. 

In [None]:
import time 
import json 
import urllib.request
from datetime import datetime

while True:
    
    with urllib.request.urlopen("https://feeds.citibikenyc.com/stations/stations.json") as url:
        data = json.loads(url.read().decode())

    stations = data['stationBeanList'] # iterate through the json to find the station data

    con = sqlite3.connect('citibikeData.db') # connect to our db

    query_template = """INSERT OR IGNORE INTO StationsData(station_id, stationName, availableDocks, totalDocks, latitude, \
    longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, \
    testStation, lastCommunicationTime, landMark) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);"""

    for entry in stations: # for every station entry in the json 
        station_id = int(entry['id']) # find and set station_id
        stationName = str(entry['stationName'])
        availableDocks = int(entry['availableDocks'])
        totalDocks = int(entry['totalDocks'])
        latitude = str(entry['latitude'])
        longitude = str(entry['longitude'])
        statusValue = str(entry['statusValue'])
        statusKey = int(entry['statusKey'])
        availableBikes = int(entry['availableBikes'])
        stAddress1 = str(entry['stAddress1'])
        stAddress2 = str(entry['stAddress2'])
        city = str(entry['city'])
        postalCode = str(entry['postalCode'])
        location = str(entry['location'])
        altitude = str(entry['altitude'])
        testStation = bool(entry['testStation'])
        lastCommunicationTime = entry['lastCommunicationTime']
        landMark = str(entry['landMark'])
                           
        print("Inserting Station:", station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
        query_parameters = (station_id, stationName, availableDocks, totalDocks, latitude, longitude, statusValue, statusKey, availableBikes, stAddress1, stAddress2, city, postalCode, location, altitude, testStation, lastCommunicationTime, landMark) 
       
        con.execute(query_template, query_parameters)
    
    con.commit()
        
    time.sleep(15)

---

## I hope this has helped you find your programming legs! Next week we'll get back to descriptive analytics using Python and Pandas. For now, take time to refresh yourself on the content covered in "Introduction to Programming". 

## If you need a referesher on your SQL skills, check out the "Supplementary Info" directory in the class repo.