# Maps plots for Datathon

In [1]:
import folium
import geopandas
import pandas as pd
import os

In [2]:
df = geopandas.read_file('../Dataset/geo_json.json')
df.head()

Unnamed: 0,OBJECTID,BoroCode,BoroName,CountyFIPS,NTACode,NTAName,Shape__Area,Shape__Length,geometry
0,1,3,Brooklyn,47,BK88,Borough Park,54005020.0,39247.228074,"POLYGON ((-73.9760507905698 40.6312841471042, ..."
1,2,4,Queens,81,QN51,Murray Hill,52488280.0,33266.904811,"POLYGON ((-73.8037916164017 40.7756183875692, ..."
2,3,4,Queens,81,QN27,East Elmhurst,19726950.0,19816.684513,"POLYGON ((-73.8610986495631 40.7636727481715, ..."
3,4,4,Queens,81,QN07,Hollis,22887770.0,20976.335837,"POLYGON ((-73.7572580842358 40.7181468677945, ..."
4,5,1,Manhattan,61,MN06,Manhattanville,10647080.0,17040.686548,"POLYGON ((-73.94607972197851 40.821271495794, ..."


In [3]:
def embed_map(m):
    from IPython.display import IFrame

    m.save('../Dataset/map.html')
    return IFrame('../Dataset/map.html', width = '100%', height = '750px')

In [4]:
data_folder = '../Dataset'

dataset_names = ['demographics', 'geographic', 'green_trips', 'mta_trips',
            'uber_trips_2014', 'uber_trips_2015', 'weather', 'yellow_trips', 'zones']
datasets = {}
for dataset_name in dataset_names:
    filename = f"{dataset_name}.csv"
    filepath = os.path.join(data_folder, filename)
    datasets[dataset_name] = pd.read_csv(filepath, dtype = { 'line_name': str }) if dataset_name == 'mta_trips' else pd.read_csv(filepath)
    print(f"loaded: {filepath}")

print('All datasets loaded!')

loaded: ../Dataset/demographics.csv
loaded: ../Dataset/geographic.csv
loaded: ../Dataset/green_trips.csv
loaded: ../Dataset/mta_trips.csv
loaded: ../Dataset/uber_trips_2014.csv
loaded: ../Dataset/uber_trips_2015.csv
loaded: ../Dataset/weather.csv
loaded: ../Dataset/yellow_trips.csv
loaded: ../Dataset/zones.csv
All datasets loaded!


# Maps for the different pick up point between Uber riders, Green Taxis and Yellow Taxis

In [5]:
trips_with_coordinates = {
    'uber_trips_2014': { 'color': 'black' },
    'yellow_trips': { 'color': 'yellow' },
    'green_trips': { 'color': 'green' }
}

dropoffs = ['yellow_trips', 'green_trips']

mta_stations_df = datasets['mta_trips'].groupby('station')[['latitude', 'longitude']].first()

maps = {}
for dataset_name, value, in trips_with_coordinates.items():
    
    folium_map = folium.Map(location = [40.738, -73.98], zoom_start = 12, tiles = 'Stamen Terrain')
    df = datasets[dataset_name]

    for i, row in df.sample(10000).iterrows():
        longitude, latitude = row['pickup_longitude'], row['pickup_latitude']
        marker = folium.CircleMarker(location = [latitude, longitude], radius = 3, color = value['color'], fill = True, stroke = False)
        marker.add_to(folium_map)
        
        if dataset_name in dropoffs:
            longitude, latitude = row['dropoff_longitude'], row['dropoff_latitude']
            marker = folium.CircleMarker(location = [latitude, longitude], radius = 3, color = 'blue', fill = True, stroke = False)
            marker.add_to(folium_map)
            

    for i, row in mta_stations_df.iterrows():
        longitude, latitude = row['longitude'], row['latitude']
        marker = folium.CircleMarker(location = [latitude, longitude], radius = 3, color = 'red', fill = True, stroke = True, weight = 2)
        marker.add_to(folium_map)
        
        
    maps[dataset_name] = folium_map

In [9]:
embed_map(maps['uber_trips_2014'])

In [10]:
embed_map(maps['yellow_trips'])

In [8]:
embed_map(maps['green_trips'])

# TODO: Graficar el dropoff tambien a ver que vemos, en amarillos y en verdes que puede ser mas interesante