In [408]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import folium
import folium.plugins as plugins
from sklearn.cluster import KMeans
import seaborn as sns; sns.set()
import geopy.distance

In [410]:
pd.options.display.max_rows = 100

In [238]:
file_name = "./data/2019_vy.csv"

#get DateTime on the correct format (in accordance with excel file)
def load_file(file_name):
    df = pd.read_csv(file_name)
    
    # change date format
    df['DateTime'] = pd.TimedeltaIndex(df['DateTime'], unit='d') + dt.datetime(1899,12,30)
    
    # reformat coordinates to dot-separated floats
    df['Long'] = df['Long'].apply(lambda x: float(str(x).replace(',','.')))
    df['Lat'] = df['Lat'].apply(lambda x: float(str(x).replace(',','.')))
    
    # remove outliers
    df = df[(df['Long'] <= 11) & (df['Long'] >= 10.62)]
    # only keep trip if it has a start and an end
    df = df[df.duplicated('Trip ID', keep=False)]
    return df

## Assign zone to each row

In [405]:
data = load_file(file_name)
data_copy = data[:100].copy()

lower_left = [59.855331, 10.601628]
upper_right = [59.973287, 10.950989]

zones = get_geojson_grid(upper_right, lower_left, lat_dim=26, lon_dim=40)

for zone in zones:
    upper_left = zone['features'][0]['geometry']['coordinates'][0][0]    
    upper_right = zone['features'][0]['geometry']['coordinates'][0][1]    
    lower_right = zone['features'][0]['geometry']['coordinates'][0][2]    
    lower_left = zone['features'][0]['geometry']['coordinates'][0][3]
    
    
    data_copy.loc[
        (data_copy.Long < float(upper_right[0]))
        & (data_copy.Long > float(upper_left[0]))
        & (data_copy.Lat > float(lower_left[1]))
        & (data_copy.Lat < float(upper_left[1])),'Zone'] = float(zone['Number'])
    

In [411]:
data_copy.tail(1000)

Unnamed: 0,DateTime,Trip ID,TripStage,Long,Lat,Car ID,Trip duration (min),Zone
0,2018-12-31 11:39:21.888000000,1,Start,10.73382,59.92934,V5a15A3226E5F7,1403.35,656.0
1,2019-01-01 11:02:43.008000000,1,End,10.749,59.91063,V5a15A3226E5F7,1403.35,497.0
2,2019-01-01 08:18:08.352000000,2,Start,10.73561,59.93056,V5a15A3226E89D,24.716667,656.0
3,2019-01-01 08:42:50.976000000,2,End,10.80917,59.94132,V5a15A3226E89D,24.716667,744.0
4,2019-01-01 14:34:39.360000000,3,Start,10.72983,59.92513,V5a15A3226E999,1067.866667,615.0
5,2019-01-02 08:22:31.008000000,3,End,10.72983,59.92514,V5a15A3226E999,1067.866667,615.0
6,2019-01-01 14:57:48.672000000,4,Start,10.75916,59.94318,V5a15A3226E87F,11.733333,779.0
7,2019-01-01 15:09:32.832000000,4,End,10.76536,59.9344,V5a15A3226E87F,11.733333,699.0
8,2019-01-01 15:09:29.376000000,5,Start,10.77465,59.91238,V5a15A3226CA4C,28.183333,500.0
9,2019-01-01 15:37:40.224000000,5,End,10.74751,59.92604,V5a15A3226CA4C,28.183333,617.0


In [329]:
data.head(10)

Unnamed: 0,DateTime,Trip ID,TripStage,Long,Lat,Car ID,Trip duration (min)
0,2018-12-31 11:39:21.888,1,Start,10.73382,59.92934,V5a15A3226E5F7,1403.35
1,2019-01-01 11:02:43.008,1,End,10.749,59.91063,V5a15A3226E5F7,1403.35
2,2019-01-01 08:18:08.352,2,Start,10.73561,59.93056,V5a15A3226E89D,24.716667
3,2019-01-01 08:42:50.976,2,End,10.80917,59.94132,V5a15A3226E89D,24.716667
4,2019-01-01 14:34:39.360,3,Start,10.72983,59.92513,V5a15A3226E999,1067.866667
5,2019-01-02 08:22:31.008,3,End,10.72983,59.92514,V5a15A3226E999,1067.866667
6,2019-01-01 14:57:48.672,4,Start,10.75916,59.94318,V5a15A3226E87F,11.733333
7,2019-01-01 15:09:32.832,4,End,10.76536,59.9344,V5a15A3226E87F,11.733333
8,2019-01-01 15:09:29.376,5,Start,10.77465,59.91238,V5a15A3226CA4C,28.183333
9,2019-01-01 15:37:40.224,5,End,10.74751,59.92604,V5a15A3226CA4C,28.183333


In [241]:
#nr of cars
data['Car ID'].nunique()

255

In [242]:
#Number of trips
count = len(data.index)/2
print(count)

149376.0


In [243]:
def get_start_data(df):
    return df.loc[df['TripStage'] == 'Start']

def get_end_data(df):
    return df.loc[df['TripStage'] == 'End']

In [244]:
def plot_scatter(df, n, center):
    
    #Stations in trip dataset
    m = folium.Map(center, zoom_start=11)
    # mark each station as a point
    for index, row in df.sample(n=n).iterrows():
        folium.CircleMarker([row['Lat'], row['Long']],
                            radius=3,
                            popup=str(row),
                            fill_color="#3db7e4", # divvy color
                           ).add_to(m)
    return m

In [245]:
def plot_heat_map(df):
    
    m = folium.Map([59.92934, 10.73382], zoom_start=12)

    # convert to (n, 2) nd-array format for heatmap
    trips = df[['Lat', 'Long']].sample(n=1000).values
    # plot heatmap
    m.add_child(plugins.HeatMap(trips, radius=15, blur=20))
    for row in centers:
        folium.CircleMarker([row[0], row[1]],
                            radius=10,
                            popup=str(row),color = "black",
                            fill_color="black",fill_opacity=0.7 # divvy color
                           ).add_to(m)
    return m

In [246]:
start_data = get_start_data(data)
end_data = get_end_data(data)

In [247]:
#plot_heat_map(start_data)
plot_scatter(end_data, 1000, )

TypeError: plot_scatter() missing 2 required positional arguments: 'n' and 'center'

In [248]:
plot_heat_map(end_data)

## Grid creator

In [252]:
#https://www.jpytr.com/post/analysinggeographicdatawithfolium/
def get_geojson_grid(upper_right, lower_left, lat_dim=8, lon_dim=15):
    """Returns a grid of geojson rectangles, and computes the exposure in each section of the grid based on the vessel data.

    Parameters
    ----------
    upper_right: array_like
        The upper right hand corner of "grid of grids" (the default is the upper right hand [lat, lon] of the USA).

    lower_left: array_like
        The lower left hand corner of "grid of grids"  (the default is the lower left hand [lat, lon] of the USA).

    n: integer
        The number of rows/columns in the (n,n) grid.

    Returns
    -------

    list
        List of "geojson style" dictionary objects   
    """

    all_boxes = []

    lat_steps = np.linspace(lower_left[0], upper_right[0], lat_dim+1)
    lon_steps = np.linspace(lower_left[1], upper_right[1], lon_dim+1)

    lat_stride = lat_steps[1] - lat_steps[0]
    lon_stride = lon_steps[1] - lon_steps[0]
    
    zone_counter = 1

    for lat in lat_steps[:-1]:
        for lon in lon_steps[:-1]:
           
            
            # Define dimensions of box in grid
            upper_left = [lon, lat + lat_stride]
            upper_right = [lon + lon_stride, lat + lat_stride]
            lower_right = [lon + lon_stride, lat]
            lower_left = [lon, lat]
                

            # Define json coordinates for polygon
            coordinates = [
                upper_left,
                upper_right,
                lower_right,
                lower_left,
                upper_left
            ]

            geo_json = {"type": "FeatureCollection",
                        "properties":{
                            "lower_left": lower_left,
                            "upper_right": upper_right,
                        },
                        "features":[],
                        "Number": zone_counter}


            grid_feature = {
                "type":"Feature",
                "geometry":{
                    "type":"Polygon",
                    "coordinates": [coordinates],
                }
            }

            geo_json["features"].append(grid_feature)

            all_boxes.append(geo_json)
            
            zone_counter+=1

    return all_boxes

## Printing grid on map

In [250]:
def plot_samples_on_grid(dataframe, n):
    
    lower_left = [59.855331, 10.601628]
    upper_right = [59.973287, 10.950989]
    #excluded_zones = [5, 10, 11, 12, 13, 14, 27, 28, 29, 43, 44, 61, 62, 76, 77, 90, 91, 92, 
    #                  93, 99, 100, 102, 105, 106, 107, 108, 114, 115, 116, 117, 120, 121, 122, 
    #                  123, 129, 130, 131, 132, 133, 135, 136, 137, 138, 144, 145, 146, 147, 148, 149]
    
    center = [lower_left[0]+(upper_right[0]-lower_left[0])/2,lower_left[1]+(upper_right[1]-lower_left[1])/2]
    
    map = plot_scatter(dataframe, n, center)

    #grid_size = int(np.sqrt(len(world.pNodes)))
    grid_size = (26,40)

    grid = get_geojson_grid(lower_left, upper_right , lat_dim=grid_size[0], lon_dim=grid_size[1])
    
    count = 0
    for i, geo_json in enumerate(grid):
        #if (i not in excluded_zones):
        gj = folium.GeoJson(geo_json,
                style_function=lambda feature: {#'fillColor': color,
                                                'color':'grey',
                                                'weight': 2,
                                                'dashArray': '5, 5',
                                                'fillOpacity': 0.1}).add_to(map)
        popup_string = "Node"+str(i)
        gj.add_child(folium.Popup(popup_string, max_width=400))

    display(map)
    
    
   
    

In [227]:
plot_samples_on_grid(data, 500)

-0.004536769230767845
-0.008734025000000756
Height of cells: 0.4958941555624817 km
Width of cells: 0.9661082940405437 km
