# Initialisation

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [1]:
!pip install geopandas folium
!pip install folium --upgrade

/bin/sh: 1: pip: not found
/bin/sh: 1: pip: not found


In [37]:
import pandas as pd
import geopandas as gpd
import folium as fol
import datetime
from IPython.display import display
from folium.plugins import HeatMap
from shapely.geometry import Point, Polygon, MultiPoint


### Import the cleaned dataset for use

In [45]:
contr_df = pd.read_csv("ungrouped_stops.csv", parse_dates=["stopped"])

timed_stops = contr_df[(contr_df["stopped"] > "2017-01-03") & (contr_df["stopped"] <= "2017-01-04")]
timed_stops

Unnamed: 0,lo,la,stopped
73733,54.688128,25.261830,2017-01-03 06:57:58
73734,54.687877,25.262218,2017-01-03 07:11:34
73735,54.689148,25.261901,2017-01-03 07:25:36
73736,54.689292,25.262171,2017-01-03 07:34:33
73737,54.688139,25.261851,2017-01-03 07:45:13
...,...,...,...
73891,54.686584,25.208831,2017-01-03 22:24:12
73892,54.685092,25.208464,2017-01-03 22:25:46
73893,54.698365,25.266316,2017-01-03 22:30:04
73894,54.684649,25.206293,2017-01-03 22:27:13


# Map display

### Display all stop events for 2017-01-03

In [66]:
# Initial folium map
m = fol.Map(location=[54.683665, 25.283795], tiles="Stamen Toner", zoom_start=13)

# Get data for the specified timeframe
timed_stops = contr_df[(contr_df["stopped"] > "2017-01-03") & (contr_df["stopped"] <= "2017-01-04")]

# Generating folium heatmap from control locations
fol.plugins.HeatMap(data=timed_stops[["lo", "la"]]).add_to(m)
fol.plugins.Fullscreen(position="bottomleft", title="Full screen").add_to(m)
display(m)

### Show stop events for each Friday of August 2016

In [67]:
# Initial folium map
fridays_aug = fol.Map(location=[54.683665, 25.283795], tiles="Stamen Toner", zoom_start=13)

# Get data for the specified timeframe
timed_stops = contr_df[(contr_df["stopped"] > "2016-08-01") & (contr_df["stopped"] <= "2016-09-01")]
timed_stops = timed_stops[timed_stops.stopped.dt.weekday==5] #  '5' is Friday

# Generating folium heatmap from control locations
fol.plugins.HeatMap(data=timed_stops[["lo", "la"]]).add_to(fridays_aug)
fol.plugins.Fullscreen(position="bottomleft", title="Full screen").add_to(fridays_aug)
display(fridays_aug)

### Show stop events for each Friday of September 2016

In [68]:
# Initial folium map
fridays_sep = fol.Map(location=[54.683665, 25.283795], tiles="Stamen Toner", zoom_start=13)

# Get data for the specified timeframe
timed_stops = contr_df[(contr_df["stopped"] > "2016-09-01") & (contr_df["stopped"] <= "2016-10-01")]
timed_stops = timed_stops[timed_stops.stopped.dt.weekday==5] #  '5' is Friday

# Generating folium heatmap from control locations
fol.plugins.HeatMap(data=timed_stops[["lo", "la"]]).add_to(fridays_sep)
fol.plugins.Fullscreen(position="bottomleft", title="Full screen").add_to(fridays_sep)
display(fridays_sep)

# Data cleanup

### Import bus stops and ticket check events

In [0]:
# Drop OSM_ID column as it's not needed
stops_df = pd.read_csv("vil_stoteles.csv").drop(["OSM_ID"], axis=1)

# Create a geometry dataframe to contain all stops
stops_geom = gpd.GeoDataFrame(
    stops_df,
    geometry=gpd.points_from_xy(
        stops_df["Y"], # The X and Y columns are flipped,
        stops_df["X"], # so passing the arguments in reverse is required
    ),
    crs={"init":"EPSG:4326"} # Soon to be deprecated, use "ESPG:4326"
)

stops_geom

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,X,Y,NAME,geometry
0,25.064913,54.620020,Lentvario kryžkelė,POINT (54.62002 25.06491)
1,25.068301,54.619508,Lentvario kryžkelė,POINT (54.61951 25.06830)
2,25.085681,54.607508,Pagiriai,POINT (54.60751 25.08568)
3,25.087015,54.608827,Pagiriai,POINT (54.60883 25.08702)
4,25.089535,54.619805,Dobūklė,POINT (54.61980 25.08953)
...,...,...,...,...
1232,25.403169,54.783067,Miškas,POINT (54.78307 25.40317)
1233,25.404628,54.783340,Miškas,POINT (54.78334 25.40463)
1234,25.429036,54.793033,Balžio st.,POINT (54.79303 25.42904)
1235,25.429648,54.792990,Balžio st.,POINT (54.79299 25.42965)


In [0]:
# Create GeoDataFrame for each stop event
contr_geom = gpd.GeoDataFrame(
    contr_df,
    geometry = gpd.points_from_xy(
        contr_df["lo"], # The X and Y columns are flipped,
        contr_df["la"], # so passing the arguments in reverse is required
    ),
    crs={"init":"EPSG:4326"} # Soon to be deprecated, use "ESPG:4326"
)

contr_geom

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0_level_0,title,id,lo,la,created,result,geometry
stopped,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-03 15:03:58,356.0,dc41ebfb88f06f3e1891b9202fe482d7,25.321036,54.709591,2014-01-03 15:03:18,2,POINT (25.32104 54.70959)
2014-01-03 15:05:34,356.0,dc41ebfb88f06f3e1891b9202fe482d7,25.321098,54.709649,2014-01-03 15:04:47,2,POINT (25.32110 54.70965)
2014-01-03 15:07:15,770.0,86ca23a27f717c53276693af14c4a0e9,25.321603,54.709602,2014-01-03 15:05:45,2,POINT (25.32160 54.70960)
2014-01-03 16:15:01,693.0,7de9ad35ac54492d6e995122cf75b7da,0.000000,0.000000,2014-01-03 15:12:08,2,POINT (0.00000 0.00000)
2014-01-03 15:17:09,755.0,86ca23a27f717c53276693af14c4a0e9,25.321134,54.709618,2014-01-03 15:15:09,2,POINT (25.32113 54.70962)
...,...,...,...,...,...,...,...
2017-04-07 09:06:45,545.0,aa3041bfad15c6508d3acb8337f24d52,54.708050,25.186371,2017-04-07 09:03:52,2,POINT (54.70805 25.18637)
2017-04-07 09:14:14,541.0,aa3041bfad15c6508d3acb8337f24d52,54.708171,25.186650,2017-04-07 09:11:38,2,POINT (54.70817 25.18665)
2017-04-07 11:35:01,0.0,216513c0b9e6b4c2bf51c16aa7522c87,54.673885,25.262356,2017-04-07 10:30:37,2,POINT (54.67388 25.26236)
2017-04-07 11:26:23,123.0,aa3041bfad15c6508d3acb8337f24d52,54.729448,25.216597,2017-04-07 11:16:49,2,POINT (54.72945 25.21660)


### Find nearest create doc



In [0]:
from shapely.ops import nearest_points

contr_points = MultiPoint(contr_geom["geometry"])
stop_points = MultiPoint(stops_geom["geometry"])

# [o. for o in nearest_points(contr_points, stop_points)]

for i in range(0,10):
    print(nearest_points(stop_points, contr_points[i]))

#TODO: create a dataframe with timestamped bus stops, and their state (number of stop events at that time)

(<shapely.geometry.point.Point object at 0x7f44e7edbd30>, <shapely.geometry.point.Point object at 0x7f44e7edbda0>)
(<shapely.geometry.point.Point object at 0x7f44e7edbda0>, <shapely.geometry.point.Point object at 0x7f44e7edba90>)
(<shapely.geometry.point.Point object at 0x7f44e7edba90>, <shapely.geometry.point.Point object at 0x7f44e7edbd30>)
(<shapely.geometry.point.Point object at 0x7f44e7edbd30>, <shapely.geometry.point.Point object at 0x7f44e7edbda0>)
(<shapely.geometry.point.Point object at 0x7f44e7edbda0>, <shapely.geometry.point.Point object at 0x7f44e7edba90>)
(<shapely.geometry.point.Point object at 0x7f44e7edba90>, <shapely.geometry.point.Point object at 0x7f44e7edbd30>)
(<shapely.geometry.point.Point object at 0x7f44e7edbd30>, <shapely.geometry.point.Point object at 0x7f44e7edbda0>)
(<shapely.geometry.point.Point object at 0x7f44e7edbda0>, <shapely.geometry.point.Point object at 0x7f44e7edba90>)
(<shapely.geometry.point.Point object at 0x7f44e7edba90>, <shapely.geometry.poin