In [1]:
import os

os.chdir("/home/tales/dev/master/mdc_analysis/")
print("working dir", os.getcwd())

import pandas as pd
from pyproj import Proj, transform
import geopy.distance as gp

import matplotlib.pyplot as plt
from src.dao import csv_dao, dbdao
from src.plot import plot
from src.utils import geo
from src.utils.others import partitions

from bokeh.io import output_notebook, show
output_notebook()

pd.set_option('display.float_format', lambda x: '%.4f' % x)

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

working dir /home/tales/dev/master/mdc_analysis


## POIs summary 

In [8]:
pois = csv_dao.load_hot_osm_pois()
valid_pois = csv_dao.load_hot_osm_pois(valid_pois=True)
print("{} pois".format(len(pois)))
print("{} valid pois".format(len(valid_pois)))
valid_pois.sample(6)

314876 pois
184105 valid pois


Unnamed: 0,osm_id,latitude,longitude,access,addr:housename,addr:housenumber,addr:interpolation,admin_level,aerialway,aeroway,amenity,area,barrier,bicycle,brand,bridge,boundary,building,capital,construction,covered,culvert,cutting,denomination,disused,ele,embankment,foot,generator:source,harbour,highway,historic,horse,intermittent,junction,landuse,layer,leisure,lock,man_made,military,motorcar,name,natural,office,oneway,operator,place,poi,population,power,power_source,public_transport,railway,ref,religion,route,service,shop,sport,surface,toll,tourism,tower:type,tunnel,water,waterway,wetland,width,wood,z_order,way,lon_4326,lat_4326
147926,5820375400,844414.98,6033436.63,,,,,,,,pharmacy,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Hersberger,,,,,,,,,,,,,,,,,,,,,,,,,,,,,010100002031BF0D005C8FC2F5FDC4294185EB51280704...,7.5855,47.5568
167089,4512126808,895529.01,6006346.2,,,10.0,,,,,fast_food,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Ahram,,,,,,,,,,,,,,,,,,,,,,,,,,,,,010100002031BF0D0052B81E0552542B41CDCCCC8C92E9...,8.0447,47.3923
289724,909421378,1019318.22,6041934.51,,,,,,,,,,,,,,,,,,,,,,,526.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,information,,,,,,,,,010100002031BF0D000AD7A3706C1B2F410AD7A3A0530C...,9.1567,47.6083
292094,6320611941,1021328.89,6050817.62,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Gebers,,,,,,,,,,,,,,,,bed,,,,,,,,,,,,,010100002031BF0D007B14AEC7212B2F417B14AE670015...,9.1747,47.6621
257540,4075750578,996225.86,5970753.51,,,21.0,,,,,restaurant,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Freihof,,,,,,,,,,,,,,,,,,,,,,,,,,,,,010100002031BF0D0085EB51B803672E410AD7A360D0C6...,8.9492,47.1754
28681,5121944839,930748.42,5828196.05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Casa Vacanze La Meridiana,,,,,,,,,,,,,,,,,,,,apartment,,,,,,,,,010100002031BF0D00713D0AD778672C4133333303993B...,8.3611,46.2978


In [10]:
print()
print("Around {0:.2f}% of Hot OSM points are considered valid".format(len(valid_pois) / len(pois)))


Around 0.58% of Hot OSM points are considered valid


### Please check <font color="red"> HOT OSM Valid POIs Analysis.html</font> for column values frequency

## POIs distribution

In [24]:
p = None

n_sample = 10000

plot_pois = valid_pois.sample(n_sample)

p = plot.plot_poi(plot_pois, title="", figure=p, width=600, height=400, lat_col="lat_4326", lon_col="lon_4326")
    
show(p)
print("Sample size: {}".format(n_sample))

Sample size: 10000


## Computing KNN POIs for each Stop Region

In [25]:
def knn_by_clusters(centroids, pois, k_neighbors, k_partitions=4):
    centroids["cluster"] = partitions(centroids, k_partitions=k_partitions, columns=["latitude", "longitude"])
    
    user_knn_pois = []

    for partition in centroids["cluster"].drop_duplicates():
        print("--")
        print("Partition: {}".format(partition))
        partition_centroids = centroids[centroids["cluster"] == partition]
        print("Stop Regions in this parittion: {}".format(len(partition_centroids)))
        
        pois["latitude"] = pois["lat_4326"]
        pois["longitude"] = pois["lon_4326"]

        close_pois = geo.grab_pois_by_stop_region_bounding_box_expand_fixed(pois, partition_centroids, expand_value=0.004)
        
        if len(close_pois) == 0:
            continue
        
        user_knn_pois = user_knn_pois + geo.knn_pois(partition_centroids, close_pois, k=k_neighbors)
        
    return user_knn_pois


In [42]:
users = csv_dao.list_stop_region_usernames()

In [50]:
k_neighbors = 30
k_partitions = 6

for user in users:
    print("\n\n")
    print("**********")
    print("User:", user)
    
    sr_centroids = csv_dao.load_user_stop_regions_centroids(user)
    
    print("Stop Regions:", len(sr_centroids))
    print("-----")
    
    user_dir = os.getcwd() + "/outputs/hot_osm_sr_knn/" + str(user)
    try:
        os.mkdir(user_dir)
    except FileExistsError:
        print("User already computed... skipping")
        continue
        #shutil.rmtree(user_dir)
        #os.mkdir(user_dir)
    except FileNotFoundError:
        os.mkdir(user_dir)
        
    print("Computing")
    
    if len(sr_centroids) < k_partitions:
        user_knn_pois = knn_by_clusters(sr_centroids, valid_pois, k_neighbors, k_partitions=1)
    
    else:
        user_knn_pois = knn_by_clusters(sr_centroids, valid_pois, k_neighbors, k_partitions=k_partitions)
    
    for knn in user_knn_pois:
        sr_id = knn["sr_id"].drop_duplicates().item()
        knn.to_csv(user_dir + "/" + "/" + "sr_" + sr_id + "_knn" + ".csv", index=False)




**********
User: 6189
Stop Regions: 574
-----
User already computed... skipping



**********
User: 5936
Stop Regions: 761
-----
Computing
--
Partition: 1
Stop Regions in this parittion: 379
3728 pois out of 184105
--
Partition: 0
Stop Regions in this parittion: 289
3066 pois out of 184105
--
Partition: 3
Stop Regions in this parittion: 61
7122 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 11
2033 pois out of 184105
--
Partition: 2
Stop Regions in this parittion: 19
19 pois out of 184105
--
Partition: 4
Stop Regions in this parittion: 2
9 pois out of 184105



**********
User: 6087
Stop Regions: 610
-----
Computing
--
Partition: 0
Stop Regions in this parittion: 560
5096 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 12
1796 pois out of 184105
--
Partition: 2
Stop Regions in this parittion: 10
224 pois out of 184105
--
Partition: 1
Stop Regions in this parittion: 14
285 pois out of 184105
--
Partition: 4
Stop Regions in this parittion: 11
146

--
Partition: 1
Stop Regions in this parittion: 55
2152 pois out of 184105
--
Partition: 3
Stop Regions in this parittion: 6
1792 pois out of 184105
--
Partition: 2
Stop Regions in this parittion: 2
733 pois out of 184105
--
Partition: 4
Stop Regions in this parittion: 6
83 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 2
101 pois out of 184105



**********
User: 5962
Stop Regions: 426
-----
Computing
--
Partition: 0
Stop Regions in this parittion: 375
372 pois out of 184105
--
Partition: 2
Stop Regions in this parittion: 30
3812 pois out of 184105
--
Partition: 4
Stop Regions in this parittion: 5
6446 pois out of 184105
--
Partition: 1
Stop Regions in this parittion: 6
4092 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 2
199 pois out of 184105
--
Partition: 3
Stop Regions in this parittion: 8
960 pois out of 184105



**********
User: 6093
Stop Regions: 59
-----
Computing
--
Partition: 5
Stop Regions in this parittion: 27
13 pois out of 184105

--
Partition: 0
Stop Regions in this parittion: 20
1976 pois out of 184105
--
Partition: 1
Stop Regions in this parittion: 52
2479 pois out of 184105
--
Partition: 3
Stop Regions in this parittion: 3
258 pois out of 184105



**********
User: 6172
Stop Regions: 83
-----
Computing
--
Partition: 0
Stop Regions in this parittion: 33
1960 pois out of 184105
--
Partition: 3
Stop Regions in this parittion: 8
18 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 1
20 pois out of 184105
--
Partition: 2
Stop Regions in this parittion: 11
83 pois out of 184105
--
Partition: 1
Stop Regions in this parittion: 29
449 pois out of 184105
--
Partition: 4
Stop Regions in this parittion: 1
0 pois out of 184105



**********
User: 5955
Stop Regions: 163
-----
Computing
--
Partition: 0
Stop Regions in this parittion: 136
522 pois out of 184105
--
Partition: 1
Stop Regions in this parittion: 10
5 pois out of 184105
--
Partition: 2
Stop Regions in this parittion: 8
10 pois out of 184105
--
P

--
Partition: 3
Stop Regions in this parittion: 5
231 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 2
17 pois out of 184105



**********
User: 6192
Stop Regions: 555
-----
Computing
--
Partition: 1
Stop Regions in this parittion: 469
1013 pois out of 184105
--
Partition: 2
Stop Regions in this parittion: 63
2302 pois out of 184105
--
Partition: 0
Stop Regions in this parittion: 15
1076 pois out of 184105
--
Partition: 4
Stop Regions in this parittion: 5
5834 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 2
204 pois out of 184105
--
Partition: 3
Stop Regions in this parittion: 1
7 pois out of 184105



**********
User: 6064
Stop Regions: 46
-----
Computing
--
Partition: 0
Stop Regions in this parittion: 22
0 pois out of 184105
--
Partition: 1
Stop Regions in this parittion: 9
693 pois out of 184105
--
Partition: 5
Stop Regions in this parittion: 7
116 pois out of 184105
--
Partition: 3
Stop Regions in this parittion: 3
33 pois out of 184105
--
P

Stop Regions: 580
-----
User already computed... skipping



**********
User: 6014
Stop Regions: 637
-----
User already computed... skipping



**********
User: 6078
Stop Regions: 24
-----
User already computed... skipping



**********
User: 6166
Stop Regions: 48
-----
User already computed... skipping



**********
User: 6015
Stop Regions: 219
-----
User already computed... skipping



**********
User: 5978
Stop Regions: 279
-----
User already computed... skipping



**********
User: 5986
Stop Regions: 530
-----
User already computed... skipping



**********
User: 5969
Stop Regions: 1559
-----
User already computed... skipping



**********
User: 6174
Stop Regions: 509
-----
User already computed... skipping



**********
User: 5949
Stop Regions: 930
-----
User already computed... skipping



**********
User: 5959
Stop Regions: 1655
-----
User already computed... skipping



**********
User: 6023
Stop Regions: 98
-----
User already computed... skipping



**********
User: 6051
Stop 