## I. Load dependencies and data

In [1]:
!pip install -U googlemaps

Requirement already up-to-date: googlemaps in c:\users\mika-\.conda\envs\machinelearning\lib\site-packages (3.1.4)


In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import googlemaps
from datetime import datetime, timedelta
import math

In [15]:
chunksize = 10 ** 6
dataset = pd.read_csv('./privamov/privamov-gps', nrows=30000000, sep='\t',names=['Id','Date','Long','Lat'])

In [7]:
position1 = dataset.iloc[10000]
position2 = dataset.iloc[0]
dist = distance(position1.Lat, position1.Long, position2.Lat, position2.Long)
dist*1000

28602.20285509703

In [16]:
for id_user in dataset.Id.unique():
    dataset_user = pd.DataFrame(columns=['Id','Data','Long','Lat'])
    dataset_user.set_index('Id')
    dataset_user = dataset[dataset['Id'] == id_user]
    path = "./split_dataset/user_" + str(id_user)
    dataset_user.to_csv(path)

## II. Create POI functions

### 2.1 : Get distance between two points

In [36]:
def distance(lat1,long1,lat2,long2):
    earthRadius = 6371000.0
    
    dLat = math.radians(lat2-lat1)
    dLong = math.radians(long2-long1)
    
    a = math.sin(dLat/2)*math.sin(dLat/2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dLong/2) * math.sin(dLong/2)
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    dist = earthRadius * c
    
    return dist

### 2.2 : Get center of points distribution

In [37]:
def getCenter(latArray, longArray):
    averageLat = 0.0
    averageLong = 0.0
    
    for d in latArray:
        averageLat += d
    for d in longArray:
        averageLong += d
        
    averageLat /= len(latArray)
    averageLong /= len(longArray)
    
    return averageLat, averageLong

### 2.3 : Get POI

In [23]:
diameter = 100 ##Diameter of POI (in meter)
duration = 60*60 ##Duration spent in zone to be considered as POI (in second)
d2r = math.pi / 180
milli2minute = 1000 * 60

In [24]:
date1 = dataset_user1.iloc[0].Date
date1
date2 = dataset_user1.iloc[1000].Date
date2

'2014-10-04 08:49:34.161'

In [25]:
date_obj = datetime.strptime(date1.split('.')[0], '%Y-%m-%d %H:%M:%S')
date_obj2 = datetime.strptime(date2.split('.')[0], '%Y-%m-%d %H:%M:%S')
delta = date_obj2 - date_obj
delta.total_seconds()

532.0

In [26]:
def identifyPOI(df):
    POI_df = pd.DataFrame(columns=['Entry_date','DeltaT','Center','Size'])
    isEmpty = True
    latArray = []
    longArray = []
    timeArray = []
    for index, row in df.iterrows():
        date = datetime.strptime(row.Date.split('.')[0], '%Y-%m-%d %H:%M:%S')
        lat = row.Lat
        long = row.Long
        
        ##First entry
        if len(latArray) == 0 :
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
            isEmpty = False
            continue
        
        ##If still in the same POI
        if(distance(latArray[0], longArray[0], lat, long) < diameter):
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
        ##If new entry outside of actual POI
        else:
            dTime = timeArray[-1] - timeArray[0]
            if (dTime.total_seconds() < duration):
                
                ##Check if new instance is ok
                while (distance(latArray[0],longArray[0],lat,long) >= diameter):
                    latArray.pop(0)
                    longArray.pop(0)
                    timeArray.pop(0)
                    
                    if(len(latArray) == 0):
                        isEmpty = True
                        break
            ##Else valid POI
            else :
                center = getCenter(latArray,longArray)
                deltaT = timeArray[-1] - timeArray[0]
                deltaT = deltaT.total_seconds()
                POI_df = POI_df.append({'Entry_date':timeArray[0],'DeltaT':deltaT,'Center':center,'Size':len(latArray)},ignore_index=True)
    
                
                latArray.clear()
                longArray.clear()
                timeArray.clear()
                
            latArray.append(lat)
            longArray.append(long)
            timeArray.append(date)
    
    if isEmpty == False :
        center = getCenter(latArray,longArray)
        deltaT = timeArray[-1] - timeArray[0]
        deltaT = deltaT.total_seconds()
        POI_df = POI_df.append({'Entry_date':timeArray[0],'DeltaT':deltaT,'Center':center,'Size':len(latArray)},ignore_index=True)
    
    return POI_df

In [13]:
def identifyPOItoCatch(df):
    timeArray = df['Entry_date']
    posArray = df['Center']
    deltaTArray = df['DeltaT']
    for i in range(0, len(posArray)):
        for j in range(0,len(posArray)):
            if (distance(posArray[i][0],posArray[i][1],posArray[j][0],posArray[j][1]) < diameter):
                posArray.drop(labels=[j],inplace=True)
                timeArray.drop(labels=[j],inplace=True)
                deltaTArray.drop(labels=[j],inplace=True)
        posArray.reset_index(drop=True)
        timeArray.reset_index(drop=True)
        deltaTArray.reset_index(drop=True)
        
    return pd.DataFrame({'Center':posArray,'Entry':timeArray,'deltaT':deltaTArray})   
        

## III. Find POI

In [33]:
id_users = [1, 6, 2, 8, 27, 17, 28, 24, 38, 42, 11, 36, 50, 26, 14, 55, 52, 85, 7, 15]

In [40]:
def CalculatePOI():
    for id_user in id_users :
        path_dataset = "./split_dataset/user_"+str(id_user)
        dataset_user = pd.read_csv(path_dataset)
        poi_dataset = identifyPOI(dataset_user)
        path_poi = './poi/poi_user_' + str(id_user)+'.csv'
        poi_dataset.to_csv(path_poi)
        print("-------POI of user {}/20 Done-------".format(id_user))

In [41]:
CalculatePOI()

-------POI of user 1/20 Done-------
-------POI of user 6/20 Done-------
-------POI of user 2/20 Done-------
-------POI of user 8/20 Done-------
-------POI of user 27/20 Done-------
-------POI of user 17/20 Done-------
-------POI of user 28/20 Done-------
-------POI of user 24/20 Done-------
-------POI of user 38/20 Done-------
-------POI of user 42/20 Done-------
-------POI of user 11/20 Done-------
-------POI of user 36/20 Done-------
-------POI of user 50/20 Done-------
-------POI of user 26/20 Done-------
-------POI of user 14/20 Done-------
-------POI of user 55/20 Done-------
-------POI of user 52/20 Done-------
-------POI of user 85/20 Done-------
-------POI of user 7/20 Done-------
-------POI of user 15/20 Done-------


In [44]:
poi_dataset_user1 = pd.read_csv('./poi/poi_user_1.csv')
poi_dataset_user1

Unnamed: 0.1,Unnamed: 0,Entry_date,DeltaT,Center,Size
0,0,2014-10-04 08:40:42,67378.0,"(43.409325609736236, 3.687527731044699)",90787
1,1,2014-10-05 03:24:10,4565.0,"(43.409323041349865, 3.687483238724403)",8174
2,2,2014-10-05 04:41:11,27140.0,"(43.40931265973461, 3.687536559698956)",32502
3,3,2014-10-05 12:17:16,5275.0,"(43.40930803888642, 3.687586438313091)",9532
4,4,2014-10-05 13:45:39,7153.0,"(43.409383230200724, 3.687536262148475)",12896
...,...,...,...,...,...
329,329,2015-03-05 13:16:54,9833.0,"(45.78731919770777, 4.8796830706781265)",349
330,330,2015-03-06 10:29:40,4134.0,"(45.786410581083835, 4.879840131150681)",3482
331,331,2015-03-06 12:44:55,6369.0,"(45.78607443639292, 4.87952442834138)",207
332,332,2015-03-09 18:48:35,49958.0,"(45.77066921752736, 4.870224225352112)",213


In [61]:
!pip install gmplot
import gmplot

Collecting gmplot
  Downloading https://files.pythonhosted.org/packages/e2/b1/e1429c31a40b3ef5840c16f78b506d03be9f27e517d3870a6fd0b356bd46/gmplot-1.2.0.tar.gz (115kB)
Building wheels for collected packages: gmplot
  Building wheel for gmplot (setup.py): started
  Building wheel for gmplot (setup.py): finished with status 'done'
  Created wheel for gmplot: filename=gmplot-1.2.0-cp37-none-any.whl size=143770 sha256=aa2953cd429870e1862b9663b0b22f2d7f2ba85b832d73b76222d119d31a4330
  Stored in directory: C:\Users\mika-\AppData\Local\pip\Cache\wheels\81\6a\76\4dd6a7cc310ba765894159ee84871e8cd55221d82ef14b81a1
Successfully built gmplot
Installing collected packages: gmplot
Successfully installed gmplot-1.2.0


In [76]:
lat_list = []
long_list = []
for i in range(0,len(poi_user7)):
    lat_list.append(poi_user7.Center[i][0])
    long_list.append(poi_user7.Center[i][1])

print(lat_list, '\n', long_list)

[45.18625979534138, 45.18632403260608, 45.20878258988913, 45.210383322767996, 45.18626605068259, 45.186332510412996, 45.21033164331581, 45.20959795112014, 45.18701150073452, 45.18625452387288, 45.186329306290126, 45.2101165826631, 45.17280680418069, 45.18627172650986, 45.193703504661144, 45.18627415937473, 45.18627933148721, 45.186907414265946, 45.18628677925106, 45.18631737183748] 
 [5.747863183331987, 5.748030783049562, 5.790639511304857, 5.793061877971477, 5.748123241503041, 5.748003328370729, 5.793007072387639, 5.7931935619327515, 5.724207006673637, 5.748003732680278, 5.747928158580314, 5.794408000893676, 5.733010848459909, 5.748074627434933, 5.726340090739676, 5.747942006581453, 5.748107395317327, 5.733862722908047, 5.748077535413068, 5.748117651464727]


In [80]:
gmap = gmplot.GoogleMapPlotter(45.185, 5.73,13)

gmap.heatmap(lat_list,long_list)



gmap.draw('C:\\Users\\mika-\\Documents\\Travail\\INSA\\Cours\\5-IF\\Système_Algo_paralleles\\Privacy\\map_user7.html')

In [96]:
lat_list2 = []
long_list2 = []
for i in range(1,len(POI_df)):
    lat_list2.append(POI_df.Center[i][0])
    long_list2.append(POI_df.Center[i][1])

In [97]:
gmap2 = gmplot.GoogleMapPlotter(45.78, 4.87,13)

gmap2.heatmap(lat_list2,long_list2)



gmap2.draw('C:\\Users\\mika-\\Documents\\Travail\\INSA\\Cours\\5-IF\\Système_Algo_paralleles\\Privacy\\map_user1.html')