## I. Load dependencies and data

In [1]:
!pip install -U googlemaps

Collecting googlemaps
  Downloading https://files.pythonhosted.org/packages/91/c6/f59f1551fc6696987a5c28c8345fc2322285e99658999f27e637b810c685/googlemaps-4.0.0-py3-none-any.whl
Installing collected packages: googlemaps
  Found existing installation: googlemaps 3.1.4
    Uninstalling googlemaps-3.1.4:
      Successfully uninstalled googlemaps-3.1.4
Successfully installed googlemaps-4.0.0


In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import googlemaps
from datetime import datetime, timedelta
import math

In [15]:
chunksize = 10 ** 6
dataset = pd.read_csv('./privamov/privamov-gps', nrows=30000000, sep='\t',names=['Id','Date','Long','Lat'])

In [7]:
position1 = dataset.iloc[10000]
position2 = dataset.iloc[0]
dist = distance(position1.Lat, position1.Long, position2.Lat, position2.Long)
dist*1000

28602.20285509703

In [16]:
for id_user in dataset.Id.unique():
    dataset_user = pd.DataFrame(columns=['Id','Data','Long','Lat'])
    dataset_user.set_index('Id')
    dataset_user = dataset[dataset['Id'] == id_user]
    path = "./split_dataset/user_" + str(id_user)
    dataset_user.to_csv(path)

## II. Create POI functions

### 2.1 : Get distance between two points

In [3]:
def distance(lat1,long1,lat2,long2):
    earthRadius = 6371000.0
    
    dLat = math.radians(lat2-lat1)
    dLong = math.radians(long2-long1)
    
    a = math.sin(dLat/2)*math.sin(dLat/2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dLong/2) * math.sin(dLong/2)
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    dist = earthRadius * c
    
    return dist

### 2.2 : Get center of points distribution

In [4]:
def getCenter(latArray, longArray):
    averageLat = 0.0
    averageLong = 0.0
    
    for d in latArray:
        averageLat += d
    for d in longArray:
        averageLong += d
        
    averageLat /= len(latArray)
    averageLong /= len(longArray)
    
    return averageLat, averageLong

### 2.3 : Get POI

In [5]:
diameter = 100 ##Diameter of POI (in meter)
duration = 60*60 ##Duration spent in zone to be considered as POI (in second)
d2r = math.pi / 180
milli2minute = 1000 * 60

In [24]:
date1 = dataset_user1.iloc[0].Date
date1
date2 = dataset_user1.iloc[1000].Date
date2

'2014-10-04 08:49:34.161'

In [25]:
date_obj = datetime.strptime(date1.split('.')[0], '%Y-%m-%d %H:%M:%S')
date_obj2 = datetime.strptime(date2.split('.')[0], '%Y-%m-%d %H:%M:%S')
delta = date_obj2 - date_obj
delta.total_seconds()

532.0

In [6]:
def identifyPOI(df):
    POI_df = pd.DataFrame(columns=['Entry_date','DeltaT','Center','Size'])
    isEmpty = True
    latArray = []
    longArray = []
    timeArray = []
    for index, row in df.iterrows():
        date = datetime.strptime(row.Date.split('.')[0], '%Y-%m-%d %H:%M:%S')
        lat = row.Lat
        long = row.Long
        
        ##First entry
        if len(latArray) == 0 :
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
            isEmpty = False
            continue
        
        ##If still in the same POI
        if(distance(latArray[0], longArray[0], lat, long) < diameter):
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
        ##If new entry outside of actual POI
        else:
            dTime = timeArray[-1] - timeArray[0]
            if (dTime.total_seconds() < duration):
                
                ##Check if new instance is ok
                while (distance(latArray[0],longArray[0],lat,long) >= diameter):
                    latArray.pop(0)
                    longArray.pop(0)
                    timeArray.pop(0)
                    
                    if(len(latArray) == 0):
                        isEmpty = True
                        break
            ##Else valid POI
            else :
                center = getCenter(latArray,longArray)
                deltaT = timeArray[-1] - timeArray[0]
                deltaT = deltaT.total_seconds()
                POI_df = POI_df.append({'Entry_date':timeArray[0],'DeltaT':deltaT,'Center':center,'Size':len(latArray)},ignore_index=True)
    
                
                latArray.clear()
                longArray.clear()
                timeArray.clear()
                
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
    
    if isEmpty == False :
        center = getCenter(latArray,longArray)
        deltaT = timeArray[-1] - timeArray[0]
        deltaT = deltaT.total_seconds()
        POI_df = POI_df.append({'Entry_date':timeArray[0],'DeltaT':deltaT,'Center':center,'Size':len(latArray)},ignore_index=True)
    
    return POI_df

In [13]:
def identifyPOItoCatch(df):
    timeArray = df['Entry_date']
    posArray = df['Center']
    deltaTArray = df['DeltaT']
    for i in range(0, len(posArray)):
        for j in range(0,len(posArray)):
            if (distance(posArray[i][0],posArray[i][1],posArray[j][0],posArray[j][1]) < diameter):
                posArray.drop(labels=[j],inplace=True)
                timeArray.drop(labels=[j],inplace=True)
                deltaTArray.drop(labels=[j],inplace=True)
        posArray.reset_index(drop=True)
        timeArray.reset_index(drop=True)
        deltaTArray.reset_index(drop=True)
        
    return pd.DataFrame({'Center':posArray,'Entry':timeArray,'deltaT':deltaTArray})   
        

## III. Find POI

In [33]:
id_users = [1, 6, 2, 8, 27, 17, 28, 24, 38, 42, 11, 36, 50, 26, 14, 55, 52, 85, 7, 15]

In [40]:
def CalculatePOI():
    for id_user in id_users :
        path_dataset = "./split_dataset/user_"+str(id_user)
        dataset_user = pd.read_csv(path_dataset)
        poi_dataset = identifyPOI(dataset_user)
        path_poi = './poi/poi_user_' + str(id_user)+'.csv'
        poi_dataset.to_csv(path_poi)
        print("-------POI of user {}/20 Done-------".format(id_user))

In [9]:
path_dataset = "./split_dataset/user_7"
dataset_user = pd.read_csv(path_dataset)
poi_dataset = identifyPOI(dataset_user)

In [10]:
poi_dataset

Unnamed: 0,Entry_date,DeltaT,Center,Size
0,2014-11-25 16:26:26,5324.0,"(45.6640469047619, 5.026609047619045)",7
1,2014-11-25 18:14:36,26333.0,"(45.186244452113776, 5.748025768600143)",12603
2,2014-11-26 01:40:12,4246.0,"(45.18629092064239, 5.747975584991567)",8342
3,2014-11-26 03:57:29,6773.0,"(45.18632709052771, 5.7482117760236955)",8108
4,2014-11-26 05:52:57,6580.0,"(45.18632202404993, 5.747956394257367)",5433
5,2014-11-26 08:10:24,9670.0,"(45.20996948680363, 5.794540513196471)",682
6,2014-11-26 11:50:27,23597.0,"(45.210027684365784, 5.795045486725666)",452
7,2014-11-26 18:53:54,6067.0,"(45.18626368708942, 5.74813185771196)",11882
8,2014-11-26 20:40:22,8379.0,"(45.18626028572638, 5.748242978352759)",16076
9,2014-11-27 00:52:45,3727.0,"(45.18624195770083, 5.748021443678265)",7250


In [41]:
CalculatePOI()

-------POI of user 1/20 Done-------
-------POI of user 6/20 Done-------
-------POI of user 2/20 Done-------
-------POI of user 8/20 Done-------
-------POI of user 27/20 Done-------
-------POI of user 17/20 Done-------
-------POI of user 28/20 Done-------
-------POI of user 24/20 Done-------
-------POI of user 38/20 Done-------
-------POI of user 42/20 Done-------
-------POI of user 11/20 Done-------
-------POI of user 36/20 Done-------
-------POI of user 50/20 Done-------
-------POI of user 26/20 Done-------
-------POI of user 14/20 Done-------
-------POI of user 55/20 Done-------
-------POI of user 52/20 Done-------
-------POI of user 85/20 Done-------
-------POI of user 7/20 Done-------
-------POI of user 15/20 Done-------


In [6]:
poi_dataset_user1 = pd.read_csv('./poi/poi_user_6.csv')
poi_dataset_user1

Unnamed: 0.1,Unnamed: 0,Entry_date,DeltaT,Center,Size
0,0,2014-11-24 18:20:33,18950.0,"(45.353884811485486, 5.495568573187868)",23884
1,1,2014-12-22 17:07:34,7196.0,"(45.63191895813563, 5.146448920007512)",5300
2,2,2014-12-22 19:12:39,3895.0,"(45.631918042048994, 5.146427475032652)",4900
3,3,2014-12-27 08:43:43,90547.0,"(48.950081220779694, 2.9191925729058843)",31811
4,4,2014-12-28 10:31:07,20998.0,"(49.07102023151619, 2.5643733273408835)",40082
...,...,...,...,...,...
153,153,2015-03-08 07:00:06,5555.0,"(45.63196730790172, 5.1463998139867275)",10786
154,154,2015-03-08 11:13:53,5398.0,"(45.070246943029645, 5.550553998684036)",1383
155,155,2015-03-08 15:06:30,5706.0,"(45.63192030418344, 5.146488388278012)",7898
156,156,2015-03-08 16:45:24,6921.0,"(45.63196572369844, 5.146473834416941)",7992


In [7]:
# test_df['Center'] = test_df[test_df['Center'][1:-1].replace(" ", "").split(',')]

def change_to_pair(row):
    string = row['Center']
    ret = string[1:-1].replace(" ", "").split(',')
    ret[0] = float(ret[0])
    ret[1] = float(ret[1])
    return ret

poi_dataset_user1['Center'] = poi_dataset_user1.apply(change_to_pair, axis=1)

In [8]:
poi_dataset_user1

Unnamed: 0.1,Unnamed: 0,Entry_date,DeltaT,Center,Size
0,0,2014-11-24 18:20:33,18950.0,"[45.353884811485486, 5.495568573187868]",23884
1,1,2014-12-22 17:07:34,7196.0,"[45.63191895813563, 5.146448920007512]",5300
2,2,2014-12-22 19:12:39,3895.0,"[45.631918042048994, 5.146427475032652]",4900
3,3,2014-12-27 08:43:43,90547.0,"[48.950081220779694, 2.9191925729058843]",31811
4,4,2014-12-28 10:31:07,20998.0,"[49.07102023151619, 2.5643733273408835]",40082
...,...,...,...,...,...
153,153,2015-03-08 07:00:06,5555.0,"[45.63196730790172, 5.1463998139867275]",10786
154,154,2015-03-08 11:13:53,5398.0,"[45.070246943029645, 5.550553998684036]",1383
155,155,2015-03-08 15:06:30,5706.0,"[45.63192030418344, 5.146488388278012]",7898
156,156,2015-03-08 16:45:24,6921.0,"[45.63196572369844, 5.146473834416941]",7992


In [9]:
!pip install gmplot
import gmplot



In [10]:
lat_list = []
long_list = []
for i in range(0,len(poi_dataset_user1)):
    lat_list.append(poi_dataset_user1.Center[i][0])
    long_list.append(poi_dataset_user1.Center[i][1])

print(lat_list, '\n', long_list)

[45.353884811485486, 45.63191895813563, 45.631918042048994, 48.950081220779694, 49.07102023151619, 49.07105175722799, 48.95007605556429, 45.63199159589225, 45.631967494735534, 45.632003190152076, 45.63203024634076, 45.63203789207418, 45.78616949509742, 45.63196562255127, 45.78624791166091, 45.78384196558285, 45.63202276279889, 45.63196491212188, 45.63200840840786, 45.632076073886026, 45.747839226655294, 45.63197435160736, 45.21127154392138, 45.21118200251242, 45.63207601967389, 45.35399167052582, 45.35374898388681, 45.35389296001281, 45.63203417237023, 45.63220450485535, 45.63208273358439, 45.786090483330966, 45.786105653858044, 45.78375847617289, 45.632001658265374, 45.63201640397897, 45.63206953952022, 45.78376816697543, 45.78455048103824, 45.78362985093183, 45.78349710061664, 45.632074972498934, 45.63206040465129, 45.786127065759175, 45.78357768877962, 45.78609519082538, 45.631934235519985, 45.63198316350714, 45.63208823908038, 45.63208282378515, 45.63196223945518, 45.6318422129056,

In [11]:
gmap = gmplot.GoogleMapPlotter(45.185, 5.73,13)

gmap.heatmap(lat_list,long_list)



gmap.draw('C:\\Users\\mika-\\Documents\\Travail\\INSA\\Cours\\5-IF\\Système_Algo_paralleles\\Privacy\\map_user6.html')

In [96]:
lat_list2 = []
long_list2 = []
for i in range(1,len(POI_df)):
    lat_list2.append(POI_df.Center[i][0])
    long_list2.append(POI_df.Center[i][1])

In [97]:
gmap2 = gmplot.GoogleMapPlotter(45.78, 4.87,13)

gmap2.heatmap(lat_list2,long_list2)



gmap2.draw('C:\\Users\\mika-\\Documents\\Travail\\INSA\\Cours\\5-IF\\Système_Algo_paralleles\\Privacy\\map_user1.html')