In [1]:
%matplotlib inline

import os

import pandas as pd
import numpy as np

from ipyleaflet import Map, Marker, GeoJSON, Circle
from matplotlib.colors import LogNorm, rgb2hex
from geojson import Polygon, Feature, Point
from matplotlib.cm import hot_r
from tqdm import tqdm 

PATH_TO_DATA = '../data'

#### 1. Загрузите агрегированные данные о поездках в мае 2016. Просуммируйте общее количество поездок такси из каждой географической зоны и посчитайте количество ячеек, из которых в мае не было совершено ни одной поездки.

In [2]:
data = pd.read_csv(os.path.join(PATH_TO_DATA, 'aggregated_yellow_tripdata_2016-05.csv'), index_col=0)
regions = pd.read_csv(os.path.join(PATH_TO_DATA, 'regions.csv'), sep=';', index_col=0)

In [3]:
data.head()

Unnamed: 0,2016-05-01 00:00:00,2016-05-01 01:00:00,2016-05-01 02:00:00,2016-05-01 03:00:00,2016-05-01 04:00:00,2016-05-01 05:00:00,2016-05-01 06:00:00,2016-05-01 07:00:00,2016-05-01 08:00:00,2016-05-01 09:00:00,...,2016-05-31 14:00:00,2016-05-31 15:00:00,2016-05-31 16:00:00,2016-05-31 17:00:00,2016-05-31 18:00:00,2016-05-31 19:00:00,2016-05-31 20:00:00,2016-05-31 21:00:00,2016-05-31 22:00:00,2016-05-31 23:00:00
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
sums = data.sum(axis=1)
sums = sums[sums == 0]
sums.shape

(1283,)

И того подучается 1283 региона где нет ни одной поездки.

#### 2. Нарисуйте статическую карту Нью-Йорка. Поставьте на карте точку там, где находится Эмпайр-Стейт-Билдинг.

In [4]:
m = Map(center=[40.748817, -73.985428], zoom=10)

mark = Marker(location=[40.748817, -73.985428])
mark.visible
m += mark

m

#### 3. Поверх статической карты Нью-Йорка визуализируйте данные о поездках из каждой ячейки так, чтобы цветовая шкала, в которую вы окрашиваете каждую ячейку, показывала суммарное количество поездок такси из неё.

In [5]:
def create_rectangle(row, color, opacity):
    x1, x2, y1, *y2 = row
    coordinates = [[[x1, y1],[x1, y2],[x2, y2],[x2, y1]]]
    poly = Polygon(coordinates)
    prop = {"style":{"color":'black', "fillColor":color, "fillOpacity":opacity, "opacity":0.5, "weight":1}}
    feature = Feature(geometry=poly, properties=prop)
    return feature

In [6]:
def get_heat_map(keys):
    colors =[rgb2hex(d[0:3]) for d in hot_r(LogNorm(vmin=1, vmax=1000000)(keys.unique()))]
    color_dict = {k: v for k, v in zip(keys.unique(), colors)}
    
    m = Map(center=[40.708817, -73.985428], zoom=10)
    
    for region in tqdm(range(1, 2501)):
        row = regions.loc[[region]].values[0]
        
        if region in keys.index:
            color = color_dict[keys[region]]
            opacity = 0.0 if keys[region] == 0 else 0.5
        else:
            color = '#FFFFFF'
            opacity = 0.0
            
        feature = create_rectangle(row, color, opacity)
        m += GeoJSON(data=feature)
        
    return m
    

In [7]:
get_heat_map(data.sum(axis=1))

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [01:40<00:00, 24.92it/s]


#### 4. Вставьте интерактивную карту Нью-Йорка — такую, которую можно прокручивать и увеличивать. Поставьте метку там, где находится статуя свободы.

In [8]:
m3 = Map(center=[40.708817, -73.985428], zoom=10)
m3 += Circle(location=[40.689249, -74.044500])
m3

#### 5. Нарисуйте на интерактивной карте Нью-Йорка ячейки так, чтобы их цвет показывал среднее за месяц количество поездок такси в час из этой зоны.

In [9]:
get_heat_map(data.mean(axis=1))

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [01:34<00:00, 16.22it/s]


#### 6. Чтобы не выбирать из всех 2500 ячеек вручную, отфильтруйте ячейки, из которых в мае совершается в среднем меньше 5 поездок в час. Посчитайте количество оставшихся. Проверьте на карте, что среди этих ячеек нет таких, из которых поездки на самом деле невозможны.

In [10]:
means = data.mean(axis=1)
means = means[means >= 5]
get_heat_map(means)

100%|█████████████████████████████████████████████████████████████████████████████████████████████| 2500/2500 [01:34<00:00, 13.03it/s]


In [11]:
means.shape

(102,)

И того, получается что только в 102 районах средрее количество поездок больше или равно 5.
Если посмотреть на крту то видно что все эти районы выглядят как вполне возможные точки старта.