## I. Load dependencies and data

In [1]:
!pip install -U googlemaps

Requirement already up-to-date: googlemaps in c:\users\mika-\.conda\envs\machinelearning\lib\site-packages (3.1.4)


In [4]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import googlemaps
from datetime import datetime, timedelta
import math

In [2]:
chunksize = 10 ** 6
dataset = pd.read_csv('./privamov/privamov-gps', nrows=5000000, sep='\t',names=['Id','Date','Long','Lat'])

In [44]:
position1 = dataset.iloc[10000]
position2 = dataset.iloc[0]
dist = distance(position1.Lat, position1.Long, position2.Lat, position2.Long)
dist*1000

28.60216354242698

In [3]:
for id_user in dataset.Id.unique():
    dataset_user = pd.DataFrame(columns=['Id','Data','Long','Lat'])
    dataset_user.set_index('Id')
    dataset_user = dataset[dataset['Id'] == id_user]
    path = "./split_dataset/user_" + str(id_user)
    dataset_user.to_csv(path)

In [55]:
dataset_user1.head(5)

Unnamed: 0,Id,Date,Long,Lat
0,1,2014-10-04 08:40:42.085,3.687373,43.409483
1,1,2014-10-04 08:40:42.098,3.687373,43.409483
2,1,2014-10-04 08:40:43.076,3.687403,43.40948
3,1,2014-10-04 08:40:43.125,3.687403,43.40948
4,1,2014-10-04 08:40:44.161,3.687382,43.409462


In [5]:
dataset_user1 = pd.read_csv("./split_dataset/user_1")

In [6]:
dataset_user1

Unnamed: 0.1,Unnamed: 0,Id,Date,Long,Lat
0,0,1,2014-10-04 08:40:42.085,3.687373,43.409483
1,1,1,2014-10-04 08:40:42.098,3.687373,43.409483
2,2,1,2014-10-04 08:40:43.076,3.687403,43.409480
3,3,1,2014-10-04 08:40:43.125,3.687403,43.409480
4,4,1,2014-10-04 08:40:44.161,3.687382,43.409462
...,...,...,...,...,...
1641270,4999995,1,2014-12-13 14:06:10.612,-97.748153,30.262645
1641271,4999996,1,2014-12-13 14:06:10.637,-97.748153,30.262645
1641272,4999997,1,2014-12-13 14:06:11.612,-97.748148,30.262657
1641273,4999998,1,2014-12-13 14:06:11.637,-97.748148,30.262657


## II. Create POI functions

### 2.1 : Get distance between two points

In [7]:
def distance(lat1,long1,lat2,long2):
    earthRadius = 6371000.0
    
    dLat = math.radians(lat2-lat1)
    dLong = math.radians(long2-long1)
    
    a = math.sin(dLat/2)*math.sin(dLat/2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dLong/2) * math.sin(dLong/2)
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    dist = earthRadius * c
    
    return dist

### 2.2 : Get center of points distribution

In [8]:
def getCenter(latArray, longArray):
    averageLat = 0.0
    averageLong = 0.0
    
    for d in latArray:
        averageLat += d
    for d in longArray:
        averageLong += d
        
    averageLat /= len(latArray)
    averageLong /= len(longArray)
    
    return averageLat, averageLong

### 2.3 : Get POI

In [9]:
diameter = 500 ##Diameter of POI (in meter)
duration = 60*120 ##Duration spent in zone to be considered as POI (in second)
d2r = math.pi / 180
milli2minute = 1000 * 60

In [10]:
date1 = dataset_user1.iloc[0].Date
date1
date2 = dataset_user1.iloc[1000].Date
date2

'2014-10-04 08:49:34.161'

In [11]:
date_obj = datetime.strptime(date1.split('.')[0], '%Y-%m-%d %H:%M:%S')
date_obj2 = datetime.strptime(date2.split('.')[0], '%Y-%m-%d %H:%M:%S')
delta = date_obj2 - date_obj
delta.total_seconds()

532.0

In [12]:
def identifyPOI(df):
    POI_df = pd.DataFrame(columns=['Entry_date','DeltaT','Center','Size'])
    isEmpty = True
    latArray = []
    longArray = []
    timeArray = []
    for index, row in df.iterrows():
        date = datetime.strptime(row.Date.split('.')[0], '%Y-%m-%d %H:%M:%S')
        lat = row.Lat
        long = row.Long
        
        ##First entry
        if len(latArray) == 0 :
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
            isEmpty = False
            continue
        
        ##If still in the same POI
        if(distance(latArray[0], longArray[0], lat, long) < diameter):
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
        ##If new entry outside of actual POI
        else:
            dTime = timeArray[-1] - timeArray[0]
            if (dTime.total_seconds() < duration):
                
                ##Check if new instance is ok
                while (distance(latArray[0],longArray[0],lat,long) >= diameter):
                    latArray.pop(0)
                    longArray.pop(0)
                    timeArray.pop(0)
                    
                    if(len(latArray) == 0):
                        isEmpty = True
                        break
            ##Else valid POI
            else :
                center = getCenter(latArray,longArray)
                deltaT = timeArray[-1] - timeArray[0]
                deltaT = deltaT.total_seconds()
                POI_df = POI_df.append({'Entry_date':timeArray[0],'DeltaT':deltaT,'Center':center,'Size':len(latArray)},ignore_index=True)
    
                
                latArray.clear()
                longArray.clear()
                timeArray.clear()
                
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
    
    if isEmpty == False :
        center = getCenter(latArray,longArray)
        deltaT = timeArray[-1] - timeArray[0]
        deltaT = deltaT.total_seconds()
        POI_df = POI_df.append({'Entry_date':timeArray[0],'DeltaT':deltaT,'Center':center,'Size':len(latArray)},ignore_index=True)
    
    return POI_df

In [13]:
def identifyPOItoCatch(df):
    timeArray = df['Entry_date']
    posArray = df['Center']
    deltaTArray = df['DeltaT']
    for i in range(0, len(posArray)):
        for j in range(0,len(posArray)):
            if (distance(posArray[i][0],posArray[i][1],posArray[j][0],posArray[j][1]) < diameter):
                posArray.drop(labels=[j],inplace=True)
                timeArray.drop(labels=[j],inplace=True)
                deltaTArray.drop(labels=[j],inplace=True)
        posArray.reset_index(drop=True)
        timeArray.reset_index(drop=True)
        deltaTArray.reset_index(drop=True)
        
    return pd.DataFrame({'Center':posArray,'Entry':timeArray,'deltaT':deltaTArray})   
        

## III. Find POI

In [14]:
POI_df = identifyPOI(dataset_user1)

In [15]:
id_user = 1
path_poi = './poi/poi_user_' + str(id_user)+'.csv'
POI_df.to_csv(path_poi)

In [16]:
POI_df

Unnamed: 0,Entry_date,DeltaT,Center,Size
0,2014-10-04 08:40:42,115383.0,"(43.4093233098101, 3.6875304057658993)",160803
1,2014-10-07 12:30:19,67454.0,"(45.77180590163932, 4.869328661202186)",61
2,2014-10-08 07:33:46,30925.0,"(45.78580376006621, 4.8792403489572616)",4843
3,2014-10-08 16:23:44,54175.0,"(45.77310891933308, 4.8717314202525595)",2719
4,2014-10-09 07:37:55,9590.0,"(45.78611514360843, 4.879920111245953)",824
...,...,...,...,...
74,2014-12-11 00:57:30,11809.0,"(30.266574518716677, -97.74401464349351)",1870
75,2014-12-11 04:22:07,22377.0,"(30.25728259295119, -97.75026776149934)",44144
76,2014-12-11 10:35:05,13368.0,"(30.25748103715033, -97.75026152489885)",8488
77,2014-12-11 16:14:40,12507.0,"(30.265910461304447, -97.73634310085195)",8653


In [17]:
poi_df_final = identifyPOItoCatch(POI_df)

KeyError: 0

In [58]:
dataset_user7 = pd.read_csv('./split_dataset/user_7')
dataset_user7

Unnamed: 0.1,Unnamed: 0,Id,Date,Long,Lat
0,637652,7,2014-11-25 15:16:08.963,4.879607,45.786298
1,637653,7,2014-11-25 15:16:08.983,4.879607,45.786298
2,637654,7,2014-11-25 15:16:09.878,4.879627,45.786268
3,637655,7,2014-11-25 15:16:09.892,4.879627,45.786268
4,637656,7,2014-11-25 15:16:10.861,4.879627,45.786268
...,...,...,...,...,...
412413,1599118,7,2014-12-02 10:25:19.7,4.879815,45.786460
412414,1599119,7,2014-12-02 10:25:20.709,4.879765,45.786502
412415,1599120,7,2014-12-02 10:25:20.7,4.879765,45.786502
412416,1599121,7,2014-12-02 10:25:21.709,4.879745,45.786512


In [59]:
poi_user7 = identifyPOI(dataset_user7)
poi_user7

Unnamed: 0,Entry_date,DeltaT,Center,Size
0,2014-11-25 18:08:53,26237.0,"(45.18625979534138, 5.747863183331987)",12411
1,2014-11-26 01:26:11,22900.0,"(45.18632403260608, 5.748030783049562)",29085
2,2014-11-26 08:04:23,13615.0,"(45.20878258988913, 5.790639511304857)",8492
3,2014-11-26 11:51:19,24002.0,"(45.210383322767996, 5.793061877971477)",1262
4,2014-11-26 18:48:13,17377.0,"(45.18626605068259, 5.748123241503041)",32306
5,2014-11-26 23:37:51,28623.0,"(45.186332510412996, 5.748003328370729)",46013
6,2014-11-27 07:52:22,8025.0,"(45.21033164331581, 5.793007072387639)",1142
7,2014-11-27 10:08:32,28373.0,"(45.20959795112014, 5.7931935619327515)",24672
8,2014-11-27 18:21:29,22122.0,"(45.18701150073452, 5.724207006673637)",18431
9,2014-11-28 00:50:17,7566.0,"(45.18625452387288, 5.748003732680278)",15108


In [61]:
!pip install gmplot
import gmplot

Collecting gmplot
  Downloading https://files.pythonhosted.org/packages/e2/b1/e1429c31a40b3ef5840c16f78b506d03be9f27e517d3870a6fd0b356bd46/gmplot-1.2.0.tar.gz (115kB)
Building wheels for collected packages: gmplot
  Building wheel for gmplot (setup.py): started
  Building wheel for gmplot (setup.py): finished with status 'done'
  Created wheel for gmplot: filename=gmplot-1.2.0-cp37-none-any.whl size=143770 sha256=aa2953cd429870e1862b9663b0b22f2d7f2ba85b832d73b76222d119d31a4330
  Stored in directory: C:\Users\mika-\AppData\Local\pip\Cache\wheels\81\6a\76\4dd6a7cc310ba765894159ee84871e8cd55221d82ef14b81a1
Successfully built gmplot
Installing collected packages: gmplot
Successfully installed gmplot-1.2.0


In [76]:
lat_list = []
long_list = []
for i in range(0,len(poi_user7)):
    lat_list.append(poi_user7.Center[i][0])
    long_list.append(poi_user7.Center[i][1])

print(lat_list, '\n', long_list)

[45.18625979534138, 45.18632403260608, 45.20878258988913, 45.210383322767996, 45.18626605068259, 45.186332510412996, 45.21033164331581, 45.20959795112014, 45.18701150073452, 45.18625452387288, 45.186329306290126, 45.2101165826631, 45.17280680418069, 45.18627172650986, 45.193703504661144, 45.18627415937473, 45.18627933148721, 45.186907414265946, 45.18628677925106, 45.18631737183748] 
 [5.747863183331987, 5.748030783049562, 5.790639511304857, 5.793061877971477, 5.748123241503041, 5.748003328370729, 5.793007072387639, 5.7931935619327515, 5.724207006673637, 5.748003732680278, 5.747928158580314, 5.794408000893676, 5.733010848459909, 5.748074627434933, 5.726340090739676, 5.747942006581453, 5.748107395317327, 5.733862722908047, 5.748077535413068, 5.748117651464727]


In [80]:
gmap = gmplot.GoogleMapPlotter(45.185, 5.73,13)

gmap.heatmap(lat_list,long_list)



gmap.draw('C:\\Users\\mika-\\Documents\\Travail\\INSA\\Cours\\5-IF\\Système_Algo_paralleles\\Privacy\\map_user7.html')

In [96]:
lat_list2 = []
long_list2 = []
for i in range(1,len(POI_df)):
    lat_list2.append(POI_df.Center[i][0])
    long_list2.append(POI_df.Center[i][1])

In [97]:
gmap2 = gmplot.GoogleMapPlotter(45.78, 4.87,13)

gmap2.heatmap(lat_list2,long_list2)



gmap2.draw('C:\\Users\\mika-\\Documents\\Travail\\INSA\\Cours\\5-IF\\Système_Algo_paralleles\\Privacy\\map_user1.html')