In [None]:
############################################
# 
# Marcus Bischof
# Divvy EDA : Chicago
#
############################################

# Operations
import pandas as pd
import numpy as np

# Custom functions
from functions_for_eda import *

# Data viz
from matplotlib import pyplot as plt
import seaborn as sns

# Maps
import folium

# Jupyter display
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Do we need to load raw .csv, and create a single memory efficient .pkl?
CREATE_SMALL_MEMORY_SET = False
if CREATE_SMALL_MEMORY_SET:
    create_memory_efficient_pkl()
    
# Do we want to break up the 860+mb memory efficient .pkl into 10 slices?
CREATE_SLICES_OF_MEMORY_EFFICIENT_PKL = False
if CREATE_SLICES_OF_MEMORY_EFFICIENT_PKL:
    create_slices_of_memory_efficient_pkl()
    
df = pd.read_pickle('../data/interim/df_0_1000000.pkl')

In [None]:
# Create a DataFrame of unique stations, and their point (point == (latitude, longitude)), 
# assuming stations always contain the same lat & long.
stations = []
for station in df.from_station_name.unique():
    stations.append({ 
        'station' : station, 
        'lat' : df[df.from_station_name == station].head(1)['latitude_start'].values[0], 
        'long' : df[df.from_station_name == station].head(1)['longitude_start'].values[0]
    })
stations = pd.DataFrame(stations)

In [None]:
# Wicker park is a nicely defined square.
# Using the Google Maps overlay seen below, combined with latitudes and longitudes taken from https://www.latlong.net/,
# we can define Wicker Park's (approximate) boundaries.
wicker_tr = (41.914238, -87.667908)
wicker_tl = (41.913950, -87.687312)
wicker_bl = (41.903032, -87.687017)
wicker_br = (41.903354, -87.667525)

# Get wicker park stations in DF
wicker_park_stations = stations[
    (stations['lat'] <= wicker_tr[0]) & (stations['lat'] <= wicker_tl[0]) &
    (stations['lat'] >= wicker_br[0]) & (stations['lat'] >= wicker_bl[0]) &
    (stations['long'] <= wicker_tr[1]) & (stations['long'] <= wicker_br[1]) &
    (stations['long'] >= wicker_tl[1]) & (stations['long'] >= wicker_bl[1])
]

In [None]:
# Starting latitude and longitude taken from: https://alysivji.github.io/getting-started-with-folium.html
m = folium.Map([41.8781, -87.6298], zoom_start=11, tiles="CartoDB dark_matter")

In [None]:
# Plot the stations
for tup in stations.itertuples():
    print(tup)
    folium.Marker([tup[1], tup[2]], popup=tup[3], icon=folium.Icon(color='red')).add_to(m)

In [None]:
m

In [None]:
# Create a column that tracks whether a trip ended at the station it started at.
df['same_station_trip'] = df[['from_station_name', 'to_station_name']].apply(lambda x : x[0] == x[1],axis=1)

In [None]:
i = 0
# We want two different colors for stations where a journey originates, and stations where a journey ends.
for tup in df[['from_station_name' , 'latitude_start', 'longitude_start', 'to_station_name' , 'latitude_end', 'longitude_end']].head(100).itertuples():
    print(i)
    i+=1
    for name, lat, long, color in [(tup[1], tup[2], tup[3], "#277554"),(tup[4], tup[5], tup[6], "#983352")]:
        print('current_tup --> {}'.format((name, lat, long, color)))
        folium.Marker([lat, long], popup=name, icon=folium.Icon(color=color)).add_to(m)


In [None]:
# Add square encircling wicker to the map.
folium.PolyLine(wicker_points, color='blue').add_to(m)

In [None]:
m