In [1]:
import pandas as pd
import os

from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.layouts import gridplot

output_notebook()

os.chdir("/home/tales/dev/master/mdc_analysis/")
print("working dir", os.getcwd())

from src.dao import csv_dao
from src.plot import plot
from src.utils import geo
from src.entity.geo_circle import GeoCircle
from src.utils.color_utils import palette


from sklearn.cluster import KMeans

import gc

# pd.set_option('display.max_columns', None)
# pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.3f' % x)

working dir /home/tales/dev/master/mdc_analysis


In [2]:
rc200_df = csv_dao.load_request_circles_df(200)
rc300_df = csv_dao.load_request_circles_df(300)
rc400_df = csv_dao.load_request_circles_df(400)
rc500_df = csv_dao.load_request_circles_df(500)
rc600_df = csv_dao.load_request_circles_df(600)
rc700_df = csv_dao.load_request_circles_df(700)
rc800_df = csv_dao.load_request_circles_df(800)
rc1k_df = csv_dao.load_request_circles_df(1000)

rc200_df.head()

Unnamed: 0,latitude,longitude,radius_m,search_tolerance,sr_ids
0,46.453729,6.866496,200,0.0015,"[6189_0, 6189_1, 6175_0, 6175_1, 6175_100, 617..."
1,46.511575,6.661025,200,0.0015,"[6189_10, 6189_111, 6189_254, 6189_334, 6189_3..."
2,46.542045,6.644075,200,0.0015,"[6189_100, 6189_101, 6189_122, 6189_137, 6189_..."
3,46.53,6.65,200,0.0015,"[6189_102, 6189_243, 6189_419, 6189_421, 5976_..."
4,46.53,6.73,200,0.0015,"[6189_103, 6189_136, 6189_232, 6189_257, 6189_..."


In [3]:
def summary(request_circles_list):
    rad_reqs = []
    
    for request_circles in request_circles_list:
        row = {}
        row["radius_m"] = request_circles["radius_m"].mean()
        row["n_requests"] = len(request_circles)
        
        sr_amout = []
        for index, request_circle in request_circles.iterrows():
            sr_amout.append(len(request_circle["sr_ids"]))
        
        row["n_stop_regions_median"] = pd.Series(sr_amout).describe()["50%"]
        rad_reqs.append(row)
    
    rad_reqs = pd.DataFrame(rad_reqs)
    return rad_reqs
    
def summary_plots(summary_df):
    p_reqs = figure(plot_width=400, plot_height=300, title="Number of Request vs Request Radius")
    p_reqs.line(summary_df["radius_m"], summary_df["n_requests"], color="lightblue", alpha=0.8, line_width=2)
    p_reqs.circle(summary_df["radius_m"], summary_df["n_requests"], color="navy", fill_alpha=1, size=4)
    
    p_sr_median = figure(plot_width=400, plot_height=300, title="Stop Regions median vs Request Radius")
    p_sr_median.line(summary_df["radius_m"], summary_df["n_stop_regions_median"], color="lightblue", alpha=0.8, line_width=2)
    p_sr_median.circle(summary_df["radius_m"], summary_df["n_stop_regions_median"], color="navy", fill_alpha=1, size=4)
    
    
    return p_reqs, p_sr_median

In [4]:
request_circles_list = [rc200_df, rc300_df, rc400_df, rc500_df, rc600_df, rc700_df, rc800_df, rc1k_df]

results = summary(request_circles_list)
print(results)
p_reqs, p_sr_median = summary_plots(results)

grid = gridplot([[p_reqs, p_sr_median]])
show(grid)

   n_requests  n_stop_regions_median  radius_m
0        5134                    3.0     200.0
1        3700                    4.0     300.0
2        3178                    4.0     400.0
3        2600                    4.0     500.0
4        2521                    5.0     600.0
5        2164                    5.0     700.0
6        2060                    5.0     800.0
7        1884                    6.0    1000.0


### Most dense Request Circles (number of Stop Regions)

In [5]:
def row_to_geo_circle(row):
    return GeoCircle(row["latitude"], row["longitude"], radius_m=row["radius_m"], data=row["sr_ids"])

def biggest_geo_circle(request_circles):
    rc_max_len_sr_ids = request_circles.iloc[0]

    for index, rc in request_circles.iterrows():
        if len(rc["sr_ids"]) > len(rc_max_len_sr_ids["sr_ids"]):
            rc_max_len_sr_ids = rc
            
    return rc_max_len_sr_ids

In [6]:
print("Greatest Request Circle for each radius")
biggests = {}
for rc in request_circles_list:
    big_rc = biggest_geo_circle(rc)
    print("{}m  - {} Stop Regions".format(big_rc["radius_m"], len(big_rc["sr_ids"])))
    biggests[big_rc["radius_m"]] = big_rc


Greatest Request Circle for each radius
200m  - 767 Stop Regions
300m  - 1017 Stop Regions
400m  - 1415 Stop Regions
500m  - 1652 Stop Regions
600m  - 1801 Stop Regions
700m  - 2476 Stop Regions
800m  - 2631 Stop Regions
1000m  - 2866 Stop Regions


In [7]:
colors = palette() + ["magenta"]
p = row_to_geo_circle(biggests[200]).plot(title="Greatests RC for each radius", legend="200m", color=colors[0], width=600, height=400)
p = row_to_geo_circle(biggests[300]).plot(legend="300m", color=colors[0], p=p)
p = row_to_geo_circle(biggests[400]).plot(legend="400m", color=colors[1], p=p)
p = row_to_geo_circle(biggests[500]).plot(legend="500m", color=colors[2], p=p)
p = row_to_geo_circle(biggests[600]).plot(legend="600m", color=colors[3], p=p)
p = row_to_geo_circle(biggests[700]).plot(legend="700m", color=colors[4], p=p)
p = row_to_geo_circle(biggests[800]).plot(legend="800m", color=colors[5], p=p)
p = row_to_geo_circle(biggests[1000]).plot(legend="1000m", color=colors[6], p=p)

show(p)

### Most dense Request Circles (number of HOT OSM pois)

In [8]:
def valid_pois(pois):
    return pois[pois["amenity"].isna() == False]

def pois_in_request_circle(request_circles, pois):
    pois["latitude"] = pois["lat_4326"]
    pois["longitude"] = pois["lon_4326"]
    rc_geo_circles = request_circles.apply(lambda row : GeoCircle(row["latitude"], row["longitude"], radius_m=row["radius_m"], searching_tolerance=row["search_tolerance"]), axis=1).tolist()
    
    for rc in rc_geo_circles:
        pois_in = []
        use_pois = geo.slice_geo_data(pois, center=rc, search_tolerance=rc.searching_tolerance)

        for index, poi in use_pois.iterrows():
            if rc.contains_point(latitude=poi["latitude"], longitude=poi["longitude"]):
                pois_in.append(poi.to_dict())
        
        rc.put_data(pd.DataFrame(pois_in))
    return rc_geo_circles

In [9]:
hotosm_pois = valid_pois(csv_dao.load_hot_osm_pois())

In [10]:
rc200_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc200_df, hotosm_pois)})

In [11]:
rc300_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc300_df, hotosm_pois)})

In [12]:
rc400_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc400_df, hotosm_pois)})

In [13]:
rc500_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc500_df, hotosm_pois)})

In [14]:
rc600_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc600_df, hotosm_pois)})

In [15]:
rc700_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc700_df, hotosm_pois)})

In [16]:
rc800_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc800_df, hotosm_pois)})

In [17]:
rc1k_with_pois_inside = pd.DataFrame({"request_circle" : pois_in_request_circle(rc1k_df, hotosm_pois)})

In [18]:
def request_circle_size(request_circles):
    return request_circles["request_circle"].apply(lambda rc : len(rc.get_data()))

In [19]:
rc200_with_pois_inside["request_circle"].iloc[0]

<src.entity.geo_circle.GeoCircle at 0x7f872d079ba8>

In [20]:
geo_circles_list = [rc200_with_pois_inside, rc300_with_pois_inside, rc400_with_pois_inside, 
                    rc500_with_pois_inside, rc600_with_pois_inside, rc700_with_pois_inside, 
                    rc800_with_pois_inside, rc1k_with_pois_inside]

print("Greatest Request Circle for each radius")
biggests_pois = {}
for rc_with_pois_inside in geo_circles_list:
    rc_with_pois_inside["pois_size"] = request_circle_size(rc_with_pois_inside)
    big_rc = rc_with_pois_inside[rc_with_pois_inside["pois_size"] == rc_with_pois_inside["pois_size"].max()].sample()
    biggests_pois[big_rc["request_circle"].item().radius_m] = big_rc["request_circle"].item()
    print("{}m \t- {} HOT OSM POIs".format(big_rc["request_circle"].item().radius_m, len(big_rc["request_circle"].item().get_data())))

Greatest Request Circle for each radius
200m 	- 94 HOT OSM POIs
300m 	- 180 HOT OSM POIs
400m 	- 196 HOT OSM POIs
500m 	- 384 HOT OSM POIs
600m 	- 355 HOT OSM POIs
700m 	- 436 HOT OSM POIs
800m 	- 504 HOT OSM POIs
1000m 	- 574 HOT OSM POIs


In [21]:
colors = palette() + ["magenta"]
p = biggests_pois[200].plot(title="Greatests RC for each radius", legend="200m", color=colors[0], width=600, height=400)
p = biggests_pois[300].plot(legend="300m", color=colors[0], p=p)
p = biggests_pois[400].plot(legend="400m", color=colors[1], p=p)
p = biggests_pois[500].plot(legend="500m", color=colors[2], p=p)
p = biggests_pois[600].plot(legend="600m", color=colors[3], p=p)
p = biggests_pois[700].plot(legend="700m", color=colors[4], p=p)
p = biggests_pois[800].plot(legend="800m", color=colors[5], p=p)
p = biggests_pois[1000].plot(legend="1000m", color=colors[6], p=p)

show(p)

In [22]:
for radius in biggests_pois.keys():
    print("{}m - Latitude: {}, Longitude: {}".format(radius, biggests_pois[radius].center_lat, biggests_pois[radius].center_lon))

200m - Latitude: 47.37707500000001, Longitude: 8.539591999999999
300m - Latitude: 46.94898260869565, Longitude: 7.440277391304347
400m - Latitude: 46.94898260869565, Longitude: 7.440277391304347
500m - Latitude: 47.3753, Longitude: 8.53895
600m - Latitude: 47.3753, Longitude: 8.53895
700m - Latitude: 47.36999999999999, Longitude: 8.539999999999997
800m - Latitude: 47.36999999999999, Longitude: 8.539999999999997
1000m - Latitude: 47.36999999999999, Longitude: 8.539999999999997


In [23]:
print("Amount of Request Circles with no HOT OSM POIs")
for rc_with_pois_inside in geo_circles_list:
    print("{}m\t-  {:.2f}%".format(rc_with_pois_inside["request_circle"].iloc[0].radius_m, 100 * len(rc_with_pois_inside[rc_with_pois_inside["pois_size"] == 0]) / len(rc_with_pois_inside)))

Amount of Request Circles with no HOT OSM POIs
200m	-  57.09%
300m	-  48.03%
400m	-  46.13%
500m	-  39.81%
600m	-  40.42%
700m	-  35.58%
800m	-  33.59%
1000m	-  31.58%
