In [1]:
import folium 
import geojson 
import geopandas as gpd 
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt
import datetime as dt 
from shapely.geometry import Point, Polygon
import sys
import glob 
import os 
from scipy import stats 
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split 
from sklearn import preprocessing
import seaborn as sns
import copy

files_path = "C:/Users/Michael Ip/TDI/Capstone_Project/citibike_tripdata/2015_04_to_2015_05"
read_files = glob.glob(os.path.join(files_path,"*.csv"))

li = []

for file in read_files: 
    df = pd.read_csv(file, index_col = None, header = 0)
    li.append(df)
    
df = pd.concat(li, axis = 0, ignore_index = True)
gpd_df = gpd.read_file("C:/Users/Michael Ip/TDI/Capstone_Project/citibike_tripdata/Neighborhood Tabulation Areas/geo_export_4f2d36a9-dd43-452a-9f81-a97957389244.shp")
m = folium.Map(location = [40.720737, -73.917217], 
               tiles = 'Stamen Toner')

In [2]:
df["age"] = 2015 - df["birth year"]

In [3]:
start_stations = df.groupby(df["start station name"])

In [7]:
indiv_station_dict = dict()
for start_station, frame in start_stations: 
    indiv_station_dict[start_station] = pd.DataFrame(frame['age']).dropna()

In [10]:
indiv_station_dict['1 Ave & E 15 St']

Unnamed: 0,age
32,56.0
220,26.0
441,28.0
532,26.0
958,33.0
...,...
1614161,20.0
1614287,32.0
1614297,27.0
1614306,26.0


In [5]:
import altair as alt 
alt.data_transformers.enable('default', max_rows=None)
vis=[]
for station, frame in indiv_station_dict.items(): 
    chart = alt.Chart(frame).mark_bar().encode(
        alt.X('age', bin=alt.Bin(extent=[18,90], step=5)), 
        y='count()').properties(width=450, height=250, title='{}'.format(station))
    
    vis.append(chart.to_json())

In [5]:
stations = df.take([4,5,6], axis=1)
#stations = df.take([4,5,6], axis=1).loc[['start station name']]
stations = stations.drop_duplicates().reset_index(drop=True)

age_bin = np.linspace(31, 44, 14)
groups = df.groupby(by = "start station name")["age"].agg(["mean"]).reset_index()
groups = groups.rename(columns = {"mean" : "mean age"})
groups["age bins"] = pd.cut(x = groups["mean age"], bins = age_bin)

stations = pd.merge(groups, stations, how='outer', on = 'start station name')


quantile_age_0 = stations["mean age"].quantile(0)
quantile_age_10 = stations["mean age"].quantile(0.10)
quantile_age_25 = stations["mean age"].quantile(0.25)
quantile_age_50 = stations["mean age"].quantile(0.50)
quantile_age_75 = stations["mean age"].quantile(0.75)
quantile_age_90 = stations["mean age"].quantile(0.90)
quantile_age_1 = stations["mean age"].quantile(1)

def colorfunc_age(row): 
    if row["mean age"] < quantile_age_25: 
        val = "blue"
    elif (quantile_age_25 <= row["mean age"] < quantile_age_75):
        val = "red"
    else: 
        val = "green"
    return val 

stations['age bin color'] = stations.apply(colorfunc_age, axis=1)

In [6]:
station_list = stations.values.tolist()
print (station_list[0])

['1 Ave & E 15 St', 35.6789207022505, Interval(35.0, 36.0, closed='right'), 40.73221853, -73.98165557, 'blue']


In [8]:
m = folium.Map(location = [40.729135, -73.992330], 
               tiles = 'Stamen Terrain',
               zoom_start=12)
for station, histogram in zip(station_list, vis): 
    folium.CircleMarker([station[3],station[4]], 
                        radius=1.5, 
                        color=station[5], 
                        tooltip='{}'.format(station[0]),
                        popup=folium.Popup(max_width=500).add_child(folium.VegaLite(histogram, width=500,height=250))).add_to(m)
m.save("map.html")