In [1]:
import numpy as np 
import pandas as pd 
import gmaps 
from vincenty import vincenty as vc

In [2]:
def filter_rows(row, minLat, minLong, maxLat, maxLong) :
    return ( (minLat<row['lat']) & (maxLat>row['lat']) & (minLong<row['long']) & (maxLong>row['long']))    

In [3]:
#Take a row from the file and return whether it is within defined params
#hours is a tuple (beginning_hour_of_day , end_hour_of_day) between (0:24)
#days is a tuple (beginning_day_of_week , end_day_of_week)between (1:7)
#months is a tuple (beginning_month_of_year , end_month_of_year) -> (11 , 2) is a possible tuple
def filter_periods(row, hours, days, months):
    date = pd.to_datetime(row["timestamp"], yearfirst=True)
    print(date.dt.dayofweek)
    return (True)

In [4]:
# Params
number_of_cells = 100
rows=200000

In [5]:
#Lyon Map Coordinates

# MAX Point
meximiax_lat = 45.904791
meximiax_long = 5.186973

# Min Point
beauvallon_lat = 45.584243
beauvallon_long = 4.676780


dist_lat = vc((meximiax_lat,meximiax_long),(beauvallon_lat,meximiax_long))
dist_long = vc((meximiax_lat,meximiax_long),(meximiax_lat,beauvallon_long))


long_cell_size = (meximiax_long - beauvallon_long)/number_of_cells
lat_cell_size = (meximiax_lat - beauvallon_lat)/number_of_cells

Lon = np.arange(beauvallon_long, meximiax_long, long_cell_size)
Lat = np.arange(beauvallon_lat, meximiax_lat, lat_cell_size) 

In [6]:
# We cut the csv in small chunks
iter_csv = pd.read_csv("./privamov/privamov-gps",
                   sep='\t',
                   names=['id','timestamp','long','lat'],
                   header = None,
                   nrows=rows,
                   infer_datetime_format= True,
                   iterator=True,
                   chunksize=1000)

In [7]:
# Here we load the chunks and filter the dataset before loading in pandas
minLat = min(meximiax_lat, beauvallon_lat)
minLong = min(meximiax_long,beauvallon_long)
maxLat = max(meximiax_lat,beauvallon_lat)
maxLong  = max(meximiax_long, beauvallon_long)
data = pd.concat([chunk[filter_rows(chunk, minLat, minLong, maxLat, maxLong)] for chunk in iter_csv])
weekDay = data[lambda row : filter_periods(row, (0,24), (1,5), (1,12))]

160803    1
160804    2
160805    2
160806    2
160807    2
         ..
199995    4
199996    4
199997    4
199998    4
199999    4
Name: timestamp, Length: 39197, dtype: int64


KeyError: True

In [None]:
data["timestamp"] = pd.to_datetime(data["timestamp"], yearfirst=True)

In [None]:
max_date = pd.to_datetime(data["timestamp"].max())
max_date

In [None]:
# We create an array of 0s of number_of_cells x number_of_cells to store the ppl mouves.
ppl_counts = np.zeros((number_of_cells,number_of_cells)) 

Lat, Lon

In [None]:
data.iloc[3]

In [None]:
for index in range(len(data)):
    for lat_index in range(100):
        if (Lat[lat_index] - lat_cell_size <= data['lat'].iloc[index] and data['lat'].iloc[index] < Lat[lat_index] + lat_cell_size):
            for long_index in range(100):
                if (Lon[long_index] - long_cell_size <= data['long'].iloc[index] and data['long'].iloc[index] < Lon[long_index] + long_cell_size):
                    ppl_counts[lat_index,long_index] += 1

In [None]:
ppl_counts

In [None]:
gmaps.configure(api_key="AIzaSyCizRBht6r1e1lDcFSb2EchAdWAWl-gqP0")

In [None]:
longitude_values = [Lon,]*number_of_cells
latitude_values = np.repeat(Lat,number_of_cells) 
ppl_counts.resize((10000,)) 

In [None]:
heatmap_data = {'Counts': ppl_counts, 'latitude': latitude_values, 'longitude' : np.concatenate(longitude_values)}
df = pd.DataFrame(data=heatmap_data) 

In [None]:
locations = df[['latitude', 'longitude']] 
weights = df['Counts'] 
fig = gmaps.figure() 
heatmap_layer = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights)) 
fig 

In [None]:
data['id'].max

In [None]:
#for each point of the path
#compute lat_index and long_index with (latpt-minlat)/cell length (idem with long)
#use ppl_count to know the the "populatity of the area"
#compare it with a choosen threshold to know if we should exclude the area
#request new path with excluded areas