In [1]:
# Standard tools
import numpy as np
import pandas as pd
import datetime as dt
from collections import Counter

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d, Axes3D # VERY important to have capitalization
import seaborn as sns
import re

# Turn off warnings
import warnings
warnings.filterwarnings('ignore')

# Pandas options
pd.set_option("max_rows", 30)
pd.set_option("max_columns", None)
pd.set_option("precision", 3)

import geocoder
import gmaps
import gmaps.datasets

with open(r'/home/harrisonized/Desktop/gmaps_apikey.txt') as f: # Grab API Key
    api_key = f.readline()
    f.close
    
gmaps.configure(api_key=api_key) # Fill in API Key

# For opening and closing files
import glob

In [2]:
filenames_list = glob.glob("*_proc.csv") # Grab a list of filenames
filenames_list = sorted(filenames_list)

# Separating into two groups based on timestamps
filenames_list_norm = filenames_list[0:32] + filenames_list[49:] 
turnstile_proc_norm = [pd.read_csv(filename) for filename in filenames_list_normal] # Put the dataframes into a list that can be called by index

filenames_list_dst = filenames_list[32:49]
turnstile_proc_dst = [pd.read_csv(filename) for filename in filenames_list_dst] # Put the dataframes into a list that can be called by index

In [5]:
# Show that the import worked for normal files
for i in range(0, len(filenames_list_norm)):
    print(i, filenames_list_norm[i])
    print(turnstile_proc_norm[i].iloc[:, 1:5].head())

0 turnstile_180331_proc.csv
       StationName  Latitude  Longitude  2018-03-24 00:00:00
0  34 ST-HERALD SQ    40.749    -73.989               5290.0
1   TIMES SQ-42 ST    40.755    -73.987               7404.0
2   34 ST-PENN STA    40.751    -73.990               3990.0
3   59 ST COLUMBUS    40.768    -73.982               3229.0
4            86 ST    40.780    -73.956               1766.0
1 turnstile_180407_proc.csv
       StationName  Latitude  Longitude  2018-03-31 00:00:00
0  34 ST-HERALD SQ    40.749    -73.989               4532.0
1   TIMES SQ-42 ST    40.755    -73.987               7059.0
2   34 ST-PENN STA    40.751    -73.990               4174.0
3   59 ST COLUMBUS    40.768    -73.982               2739.0
4            86 ST    40.780    -73.956               1819.0
2 turnstile_180414_proc.csv
       StationName  Latitude  Longitude  2018-04-07 00:00:00
0  34 ST-HERALD SQ    40.749    -73.989               6049.0
1   TIMES SQ-42 ST    40.755    -73.987               7202.0
2

In [6]:
# Show that the import worked for dst files
for i in range(0, len(filenames_list_dst)):
    print(i, filenames_list_dst[i])
    print(turnstile_proc_dst[i].iloc[:, 1:5].head())

0 turnstile_181117_proc.csv
       StationName  Latitude  Longitude  2018-11-10 03:00:00
0  34 ST-HERALD SQ    40.749    -73.989               3085.0
1   TIMES SQ-42 ST    40.755    -73.987               2930.0
2   34 ST-PENN STA    40.751    -73.990               3953.0
3   59 ST COLUMBUS    40.768    -73.982               1930.0
4            86 ST    40.780    -73.956               2600.0
1 turnstile_181124_proc.csv
       StationName  Latitude  Longitude  2018-11-17 03:00:00
0  34 ST-HERALD SQ    40.749    -73.989               3484.0
1   TIMES SQ-42 ST    40.755    -73.987               3024.0
2   34 ST-PENN STA    40.751    -73.990               3947.0
3   59 ST COLUMBUS    40.768    -73.982               1958.0
4            86 ST    40.780    -73.956               1807.0
2 turnstile_181201_proc.csv
       StationName  Latitude  Longitude  2018-11-24 03:00:00
0  34 ST-HERALD SQ    40.749    -73.989               2981.0
1   TIMES SQ-42 ST    40.755    -73.987               2940.0
2

In [None]:
# Formatting stuff for norm
turnstile_proc_norm_form = list(map(lambda x: x.set_index("StationName").drop(columns = {'Unnamed: 0', "Latitude", "Longitude"}), turnstile_proc_norm))
turnstile_proc_norm_form_cat = pd.concat([turnstile_proc_norm_form[i] for i in range(0, len(turnstile_proc_norm_form))], axis=1, sort=False)
#turnstile_proc_norm_form_cat.transpose().to_csv("turnstile_norm_cat.csv")

In [None]:
# Formatting stuff for dst
turnstile_proc_dst_form = list(map(lambda x: x.set_index("StationName").drop(columns = {'Unnamed: 0', "Latitude", "Longitude"}), turnstile_proc_dst))
turnstile_proc_dst_form_cat = pd.concat([turnstile_proc_dst_form[i] for i in range(len(turnstile_proc_dst_form))], axis=1, sort=False)
#turnstile_proc_dst_form_cat.transpose().to_csv("turnstile_dst_cat.csv")

In [None]:
turnstile_norm = pd.read_csv(r"turnstile_norm_cat.csv")
turnstile_norm = turnstile_norm.rename(columns = {"Unnamed: 0" : "DateTime"}).set_index("DateTime") # Formatting
turnstile_norm # Preview

In [None]:
turnstile_dst = pd.read_csv(r"turnstile_dst_cat.csv")
turnstile_dst = turnstile_dst.rename(columns = {"Unnamed: 0" : "DateTime"}).set_index("DateTime") # formatting
turnstile_dst # Preview

In [None]:
turnstile_norm[['34 ST-HERALD SQ','TIMES SQ-42 ST']].iloc[:]

In [None]:
from matplotlib.font_manager import FontProperties

In [None]:
turnstile_norm.iloc[:42, 0:10].plot(ylim = (0, 100000), xticks=None)
plt.xticks(rotation=90)

In [None]:
turnstile_norm[['34 ST-HERALD SQ','TIMES SQ-42 ST']].iloc[:42].plot(ylim = (0, 100000), xticks=None)
plt.xticks(rotation=90)

In [None]:
turnstile_norm.iloc[42*3-3:42*3-3, 0:3].plot(ylim = (0, 100000))

In [9]:
turnstile_proc_norm[0]

Unnamed: 0.1,Unnamed: 0,StationName,Latitude,Longitude,2018-03-24 00:00:00,2018-03-24 04:00:00,2018-03-24 08:00:00,2018-03-24 12:00:00,2018-03-24 16:00:00,2018-03-24 20:00:00,2018-03-25 00:00:00,2018-03-25 04:00:00,2018-03-25 08:00:00,2018-03-25 12:00:00,2018-03-25 16:00:00,2018-03-25 20:00:00,2018-03-26 00:00:00,2018-03-26 04:00:00,2018-03-26 08:00:00,2018-03-26 12:00:00,2018-03-26 16:00:00,2018-03-26 20:00:00,2018-03-27 00:00:00,2018-03-27 04:00:00,2018-03-27 08:00:00,2018-03-27 12:00:00,2018-03-27 16:00:00,2018-03-27 20:00:00,2018-03-28 00:00:00,2018-03-28 04:00:00,2018-03-28 08:00:00,2018-03-28 12:00:00,2018-03-28 16:00:00,2018-03-28 20:00:00,2018-03-29 00:00:00,2018-03-29 04:00:00,2018-03-29 08:00:00,2018-03-29 12:00:00,2018-03-29 16:00:00,2018-03-29 20:00:00,2018-03-30 00:00:00,2018-03-30 04:00:00,2018-03-30 08:00:00,2018-03-30 12:00:00,2018-03-30 16:00:00
0,0,34 ST-HERALD SQ,40.749,-73.989,5290.0,4989.0,22641.0,39258.0,45497.0,25397.0,4959.0,3329.0,16726.0,31871.0,34838.0,16971.0,1858.0,17148.0,62569.0,42941.0,88409.0,26544.0,2340.0,17795.0,65179.0,47345.0,88398.0,27214.0,2575.0,17400.0,62974.0,47270.0,88507.0,28918.0,2943.0,17181.0,64696.0,48543.0,87693.0,29395.0,3266.0,12885.0,50122.0,56559.0,75342.0
1,1,TIMES SQ-42 ST,40.755,-73.987,7404.0,4979.0,18779.0,37793.0,40275.0,27322.0,6354.0,2913.0,13452.0,25116.0,27860.0,16582.0,2937.0,13347.0,49565.0,32745.0,66188.0,23314.0,3570.0,13950.0,51849.0,36639.0,70119.0,26104.0,3798.0,14090.0,52127.0,38770.0,71745.0,28870.0,4420.0,13476.0,52249.0,38587.0,70562.0,30440.0,5325.0,9816.0,39354.0,40861.0,57168.0
2,2,34 ST-PENN STA,40.751,-73.990,3990.0,5309.0,22804.0,30399.0,33446.0,18063.0,3497.0,3416.0,16053.0,24811.0,28150.0,14909.0,1595.0,25158.0,62176.0,37169.0,75612.0,20705.0,2072.0,25863.0,63002.0,40014.0,78705.0,20714.0,2462.0,26203.0,62836.0,38062.0,80069.0,23034.0,2573.0,24961.0,64108.0,42632.0,79866.0,23661.0,2812.0,16748.0,46412.0,47915.0,54609.0
3,3,59 ST COLUMBUS,40.768,-73.982,3229.0,3309.0,22478.0,42805.0,28805.0,12849.0,2509.0,2168.0,12332.0,21229.0,20694.0,8401.0,1257.0,10272.0,33128.0,28937.0,43451.0,14545.0,1584.0,10775.0,34560.0,30623.0,44318.0,15979.0,1665.0,10761.0,33722.0,30340.0,44614.0,17137.0,1935.0,10311.0,33604.0,29899.0,44163.0,17285.0,2370.0,6967.0,24660.0,28503.0,31920.0
4,4,86 ST,40.780,-73.956,1766.0,3356.0,13295.0,22862.0,22002.0,10008.0,1776.0,2249.0,10261.0,16902.0,14871.0,6084.0,842.0,12685.0,29276.0,24297.0,36283.0,11046.0,998.0,12818.0,31155.0,26191.0,37662.0,11292.0,1084.0,12642.0,31233.0,25856.0,36681.0,12311.0,1264.0,12374.0,30704.0,26770.0,36601.0,11948.0,1361.0,8303.0,26068.0,27647.0,29492.0
5,5,GRD CNTRL-42 ST,40.752,-73.977,2730.0,3683.0,15202.0,24757.0,22941.0,10941.0,2419.0,1952.0,10943.0,17077.0,16543.0,7285.0,966.0,15609.0,43756.0,25124.0,49517.0,12508.0,1755.0,16064.0,45588.0,28746.0,51521.0,13283.0,1806.0,15859.0,45170.0,28039.0,50827.0,13981.0,1883.0,14928.0,45630.0,29779.0,51213.0,14002.0,2128.0,9958.0,32606.0,31718.0,37371.0
6,6,59 ST,40.642,-74.018,2384.0,4598.0,13538.0,21086.0,20090.0,9112.0,2018.0,2935.0,9227.0,14293.0,13096.0,6028.0,1174.0,14007.0,32222.0,24413.0,40408.0,11152.0,1465.0,14043.0,34940.0,26814.0,41718.0,11709.0,1571.0,13656.0,34447.0,27268.0,41455.0,12813.0,1772.0,13444.0,33622.0,26184.0,40537.0,12275.0,1804.0,8566.0,24344.0,25073.0,27831.0
7,7,FLUSHING-MAIN,40.760,-73.830,2193.0,5716.0,16460.0,18321.0,19640.0,11571.0,2074.0,3259.0,12537.0,14876.0,14804.0,8709.0,1231.0,17233.0,24518.0,18736.0,29758.0,13556.0,1610.0,17202.0,24955.0,18568.0,29575.0,13542.0,1476.0,17391.0,24397.0,18773.0,30080.0,14024.0,1691.0,16986.0,24978.0,18722.0,29959.0,14970.0,1876.0,11761.0,20845.0,18334.0,25519.0
8,8,ATL AV-BARCLAY,40.684,-73.979,1903.0,2097.0,8979.0,15873.0,19182.0,11949.0,1902.0,1519.0,8488.0,19413.0,14080.0,5406.0,806.0,8589.0,21175.0,16446.0,30686.0,11215.0,922.0,8532.0,21924.0,16510.0,27449.0,8214.0,923.0,8999.0,20723.0,16505.0,27773.0,9050.0,1118.0,8711.0,20182.0,17031.0,26802.0,9075.0,1236.0,5316.0,15019.0,18085.0,26243.0
9,9,23 ST,40.741,-73.989,1740.0,1563.0,9100.0,16071.0,17400.0,8169.0,1500.0,1025.0,6425.0,12129.0,11999.0,5116.0,564.0,8027.0,35725.0,25603.0,48160.0,14146.0,946.0,8600.0,37734.0,27543.0,50870.0,15369.0,1069.0,8356.0,37037.0,28032.0,49580.0,15903.0,1231.0,8085.0,30817.0,23273.0,50508.0,16685.0,1545.0,5483.0,27789.0,27724.0,38174.0


In [None]:
"""

keys = turnstile_190330_proc_df["StationName"][20:0:-1]
vals = turnstile_190330_proc_df.iloc[:,6][20:0:-1]

plt.barh(keys, np.divide(list(vals), sum(vals)))

plt.ylabel('Station Name')
plt.xlabel('Value Counts (millions)')

plt.show()

"""

In [23]:
locations = turnstile_proc_norm[0][['Latitude', 'Longitude']].iloc[0:30]
locations

Unnamed: 0,Latitude,Longitude
0,40.749,-73.989
1,40.755,-73.987
2,40.751,-73.99
3,40.768,-73.982
4,40.78,-73.956
5,40.752,-73.977
6,40.642,-74.018
7,40.76,-73.83
8,40.684,-73.979
9,40.741,-73.989


In [52]:
#Run this after ready

#Configuring the dimensions
figure_layout = {'width': '800px', 'height': '600px','padding': '1px', 'margin': '0 auto 0 auto'}

#Get the locations from the data set
locations = turnstile_proc_norm[0][['Latitude', 'Longitude']].iloc[0:20]

#Get the weights from the data
weights = turnstile_proc_norm[0]['2018-03-24 00:00:00'].iloc[0:20]

#Set up the map
fig = gmaps.figure(layout=figure_layout)
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
gmaps.heatmap_layer.max_intensity = 30
gmaps.heatmap_layer.min_intensity = 5
fig

Figure(layout=FigureLayout(height='600px', margin='0 auto 0 auto', padding='1px', width='800px'))

In [27]:
tech_hub_locations_df = pd.read_csv("TechHubLocations.csv")

In [30]:
tech_hub_locations_df

Unnamed: 0,location,latitude,longitude
0,1776 (Startup Inncubator),40.703,-73.97
1,Facebook NY,40.731,-73.991
2,Angelpad,40.736,-73.994
3,Google,40.741,-74.002
4,NYU,40.73,-73.996


In [53]:
#Configuring the dimensions
figure_layout = {'width': '800px', 'height': '600px','padding': '1px', 'margin': '0 auto 0 auto'}

#Get the locations from the data set
locations = tech_hub_locations_df[['latitude', 'longitude']].iloc[0:5]

#Get the weights from the data
weights = tech_hub_locations_df['latitude'].iloc[0:5]

#Get the weights from the data
location_label = tech_hub_locations_df['location'].iloc[0:5]

#Set up the map
fig = gmaps.figure(layout=figure_layout)
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
gmaps.heatmap_layer.max_intensity = 30
gmaps.heatmap_layer.min_intensity = 5
fig

Figure(layout=FigureLayout(height='600px', margin='0 auto 0 auto', padding='1px', width='800px'))

In [28]:
marker_locations = [(10.0, 10.0), (20.0, 30.0)]
info_boxes = [gmaps.InfoBox('South Carolina'), gmaps.InfoBox('Vermont')]
markers = [
    gmaps.Marker(location=location, info_box=info_box)
    for location, info_box in zip(marker_locations, info_boxes)
]
marker_layer = gmaps.Markers(markers=markers)

m = gmaps.Map()
m.add_layer(marker_layer)

AttributeError: module 'gmaps' has no attribute 'InfoBox'