In [133]:
import requests, json, os
import psycopg2
from datetime import datetime
from dateutil import tz
import pandas as pd

In [134]:
requests.__version__

'2.22.0'

In [135]:
pd.__version__

'0.24.2'

# Make Connection to Google cloud function

In [136]:
import urllib.request

In [140]:
QUERY_FUNC = 'https://us-central1-divvy-bike-shari-1562130131708.cloudfunctions.net/query_from_divvy_cloudsql'

In [257]:
%%time
reslst = requests.post(QUERY_FUNC, json={'stationid': '35, 192'}).content.decode('utf-8')

CPU times: user 40.7 ms, sys: 59.4 ms, total: 100 ms
Wall time: 599 ms


In [258]:
def to_dataframe(raw):
    lst_of_lst = [v.split(',') for v in raw.split('\n')]
    df = pd.DataFrame(lst_of_lst, columns=['timestamp', 'stationid', 'bikes_avail', 'docks_avail'])
    return df

In [259]:
reslst = requests.post(QUERY_FUNC, json={'stationid': '35,192,100,2'}).content.decode('utf-8')

In [260]:
df = to_dataframe(reslst)

In [261]:
df.shape

(5638, 4)

In [262]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5638 entries, 0 to 5637
Data columns (total 4 columns):
timestamp      5638 non-null object
stationid      5638 non-null object
bikes_avail    5638 non-null object
docks_avail    5638 non-null object
dtypes: object(4)
memory usage: 176.3+ KB


In [394]:
tmp = df[df.stationid == '35'].copy()

In [395]:
tmp['timeindex'] = pd.to_datetime(df['timestamp']).dt.tz_localize('utc').dt.tz_convert('US/Central')
tmp['month'] = tmp.timeindex.apply(lambda x: x.month)
tmp['day'] = tmp.timeindex.apply(lambda x: x.day)
tmp['hour'] = tmp.timeindex.apply(lambda x: x.hour)

In [396]:
tmp.bikes_avail = tmp.bikes_avail.astype('int')
tmp.docks_avail = tmp.docks_avail.astype('int')

In [397]:
min_df = tmp.groupby(['month', 'day', 'hour'])[['timeindex', 'bikes_avail', 'docks_avail']].min()\
        .reset_index().rename(columns={"bikes_avail": "min_bikes", "docks_avail": "min_docks"})

In [398]:
max_df = tmp.groupby(['month', 'day', 'hour'])[['bikes_avail', 'docks_avail']].max()\
        .reset_index().rename(columns={"bikes_avail": "max_bikes", "docks_avail": "max_docks"})

In [399]:
ave_df = tmp.groupby(['month', 'day', 'hour'])[['bikes_avail', 'docks_avail']].mean()\
        .reset_index().rename(columns={"bikes_avail": "ave_bikes", "docks_avail": "ave_docks"})

In [400]:
min_df.merge(max_df, on=['month', 'day', 'hour']).merge(ave_df, on=['month', 'day', 'hour'])

Unnamed: 0,month,day,hour,timeindex,min_bikes,min_docks,max_bikes,max_docks,ave_bikes,ave_docks
0,8,8,18,2019-08-08 18:04:37.221301-05:00,11,19,24,33,17.250000,26.500000
1,8,8,19,2019-08-08 19:00:00.747401-05:00,0,24,20,44,8.583333,35.416667
2,8,8,20,2019-08-08 20:00:01.058098-05:00,0,35,9,44,3.333333,40.583333
3,8,8,21,2019-08-08 21:00:01.128349-05:00,8,30,14,36,9.833333,34.166667
4,8,8,22,2019-08-08 22:00:01.232417-05:00,6,29,15,38,10.416667,33.583333
5,8,8,23,2019-08-08 23:00:01.174269-05:00,7,35,9,37,8.833333,35.166667
6,8,9,0,2019-08-09 00:00:00.864880-05:00,9,32,12,35,9.750000,34.250000
7,8,9,1,2019-08-09 01:00:01.159534-05:00,12,32,12,32,12.000000,32.000000
8,8,9,2,2019-08-09 02:00:00.216699-05:00,12,32,12,32,12.000000,32.000000
9,8,9,3,2019-08-09 03:00:00.537010-05:00,12,32,12,32,12.000000,32.000000


In [232]:
# df.bikes_avail.rolling(12).min()

# Visualize data

In [151]:
import plotly
from plotly.offline import iplot, plot

In [152]:
plotly.__version__

'3.10.0'

# Query Live Station Status (Up to now)

In [237]:
DIVVY_STATION_URL = 'https://gbfs.divvybikes.com/gbfs/en/station_information.json'

In [254]:
res = requests.get(DIVVY_STATION_URL)
jsonres = res.json()

station_json = json.dumps(jsonres['data']['stations'])
station_status_df = pd.read_json(station_json)

In [255]:
cleaned_stationdata = station_status_df[['capacity', 'lat', 'lon', 'station_id', 'name', 'short_name']]
cleaned_stationdata.to_csv('station.csv')

In [256]:
cleaned_stationdata.head(5)

Unnamed: 0,capacity,lat,lon,station_id,name,short_name
0,39,41.86943,-87.622391,2,Buckingham Fountain (Temp),T316041805
1,55,41.867226,-87.615355,3,Shedd Aquarium,LF-001
2,23,41.856268,-87.613348,4,Burnham Harbor,KA1504000163
3,23,41.874053,-87.627716,5,State St & Harrison St,TA1305000036
4,39,41.886976,-87.612813,6,Dusable Harbor,KA1503000064


# Make Connection to postgres

## Initialize connection

In [41]:
gcp_sql_username = os.environ.get('gcp_sql_username')
gcp_sql_password = os.environ.get('gcp_sql_password')

conn = psycopg2.connect(user=gcp_sql_username, password=gcp_sql_password,
                        host='localhost', port='5432')

## Query data 

In [42]:
%%time
DISPLAY_ROWS = 10000

cur = conn.cursor()
cur.execute('SELECT * FROM divvylivedata WHERE stationid = %s;' %('192'))
print("Total rows: {}\nDisplayed rows: {}\n".format(cur.rowcount, DISPLAY_ROWS))

row_counter = 1
row = cur.fetchone()
while row is not None and row_counter <= DISPLAY_ROWS:
#     print(','.join([str(v) for v in row]))
    row = cur.fetchone()
    row_counter += 1
print(row_counter)
cur.close()

Total rows: 296
Displayed rows: 10000

297
CPU times: user 3.53 ms, sys: 3.01 ms, total: 6.55 ms
Wall time: 232 ms


### Convert unix timestamps into timestamps and consider timezone

In [165]:
utc_timestamp = datetime.utcfromtimestamp(1565246835).strftime('%Y-%m-%d %H:%M:%S')
print(utc_timestamp)

2019-08-08 06:47:15


In [167]:
# METHOD 1: Hardcode zones:
from_zone = tz.gettz('UTC')
to_zone = tz.gettz('America/Chicago')

# # METHOD 2: Auto-detect zones:
# from_zone = tz.tzutc()
# to_zone = tz.tzlocal()

# utc = datetime.utcnow()

utc = datetime.strptime(utc_timestamp, '%Y-%m-%d %H:%M:%S')

# Tell the datetime object that it's in UTC time zone since 
# datetime objects are 'naive' by default
utc = utc.replace(tzinfo=from_zone)

# Convert time zone
central = utc.astimezone(to_zone)

print("Local time in Chicago: ", central)

Local time in Chicago:  2019-08-08 01:47:15-05:00


## Close connection

In [50]:
if conn:
    conn.close()