In [1]:
import numpy as np 
import pandas as pd 
import gmaps 
from vincenty import vincenty as vc

In [2]:
def filter_rows(row, minLat, minLong, maxLat, maxLong) :
    return ( (minLat<row['lat']) & (maxLat>row['lat']) & (minLong<row['long']) & (maxLong>row['long']))    

In [3]:
# Params
number_of_cells = 100
rows=200000

In [4]:
#Lyon Map Coordinates

# MAX Point
meximiax_lat = 45.904791
meximiax_long = 5.186973

# Min Point
beauvallon_lat = 45.584243
beauvallon_long = 4.676780


dist_lat = vc((meximiax_lat,meximiax_long),(beauvallon_lat,meximiax_long))
dist_long = vc((meximiax_lat,meximiax_long),(meximiax_lat,beauvallon_long))


long_cell_size = (meximiax_long - beauvallon_long)/number_of_cells
lat_cell_size = (meximiax_lat - beauvallon_lat)/number_of_cells

Lon = np.arange(beauvallon_long, meximiax_long, long_cell_size)
Lat = np.arange(beauvallon_lat, meximiax_lat, lat_cell_size) 

In [5]:
# We cut the csv in small chunks
iter_csv = pd.read_csv("./privamov/privamov-gps",
                   sep='\t',
                   names=['id','timestamp','long','lat'],
                   header = None,
                   nrows=rows,
                   infer_datetime_format= True,
                   iterator=True,
                   chunksize=1000)

In [7]:
# Here we load the chunks and filter the dataset before loading in pandas
minLat = min(meximiax_lat, beauvallon_lat)
minLong = min(meximiax_long,beauvallon_long)
maxLat = max(meximiax_lat,beauvallon_lat)
maxLong  = max(meximiax_long, beauvallon_long)
data = pd.concat([chunk[filter_rows(chunk, minLat, minLong, maxLat, maxLong)] for chunk in iter_csv])

In [8]:
data["timestamp"] = pd.to_datetime(data["timestamp"], yearfirst=True)

In [9]:
max_date = pd.to_datetime(data["timestamp"].max())
max_date

Timestamp('2014-10-10 18:58:11.253000')

In [10]:
# We create an array of 0s of number_of_cells x number_of_cells to store the ppl mouves.
ppl_counts = np.zeros((number_of_cells,number_of_cells)) 

Lat, Lon

(array([45.584243  , 45.58744848, 45.59065396, 45.59385944, 45.59706492,
        45.6002704 , 45.60347588, 45.60668136, 45.60988684, 45.61309232,
        45.6162978 , 45.61950328, 45.62270876, 45.62591424, 45.62911972,
        45.6323252 , 45.63553068, 45.63873616, 45.64194164, 45.64514712,
        45.6483526 , 45.65155808, 45.65476356, 45.65796904, 45.66117452,
        45.66438   , 45.66758548, 45.67079096, 45.67399644, 45.67720192,
        45.6804074 , 45.68361288, 45.68681836, 45.69002384, 45.69322932,
        45.6964348 , 45.69964028, 45.70284576, 45.70605124, 45.70925672,
        45.7124622 , 45.71566768, 45.71887316, 45.72207864, 45.72528412,
        45.7284896 , 45.73169508, 45.73490056, 45.73810604, 45.74131152,
        45.744517  , 45.74772248, 45.75092796, 45.75413344, 45.75733892,
        45.7605444 , 45.76374988, 45.76695536, 45.77016084, 45.77336632,
        45.7765718 , 45.77977728, 45.78298276, 45.78618824, 45.78939372,
        45.7925992 , 45.79580468, 45.79901016, 45.8

In [19]:
data.iloc[3]

id                                    1
timestamp    2014-10-08 07:14:05.105000
long                             4.8696
lat                              45.773
Name: 160806, dtype: object

In [22]:
for index in range(len(data)):
    for lat_index in range(100):
        if (Lat[lat_index] - lat_cell_size <= data['lat'].iloc[index] and data['lat'].iloc[index] < Lat[lat_index] + lat_cell_size):
            for long_index in range(100):
                if (Lon[long_index] - long_cell_size <= data['long'].iloc[index] and data['long'].iloc[index] < Lon[long_index] + long_cell_size):
                    ppl_counts[lat_index,long_index] += 1

In [23]:
ppl_counts

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [24]:
gmaps.configure(api_key="AIzaSyCizRBht6r1e1lDcFSb2EchAdWAWl-gqP0")

In [25]:
longitude_values = [Lon,]*number_of_cells
latitude_values = np.repeat(Lat,number_of_cells) 
ppl_counts.resize((10000,)) 

In [26]:
heatmap_data = {'Counts': ppl_counts, 'latitude': latitude_values, 'longitude' : np.concatenate(longitude_values)}
df = pd.DataFrame(data=heatmap_data) 

In [27]:
locations = df[['latitude', 'longitude']] 
weights = df['Counts'] 
fig = gmaps.figure() 
heatmap_layer = gmaps.heatmap_layer(locations, weights=weights) 
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights)) 
fig 

Figure(layout=FigureLayout(height='420px'))

In [None]:
data['id'].max