# Finding work and leaving place

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from datetime import datetime, timedelta
import math

### Get distance between two points

In [2]:
diameter = 500 ##Diameter of POI (in meter)

In [3]:
def distance(lat1,long1,lat2,long2):
    earthRadius = 6371000.0
    
    dLat = math.radians(lat2-lat1)
    dLong = math.radians(long2-long1)
    
    a = math.sin(dLat/2)*math.sin(dLat/2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dLong/2) * math.sin(dLong/2)
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    dist = earthRadius * c
    
    return dist

### Get center of points distribution

In [4]:
def getCenter(latArray, longArray):
    averageLat = 0.0
    averageLong = 0.0
    
    for d in latArray:
        averageLat += d
    for d in longArray:
        averageLong += d
        
    averageLat /= len(latArray)
    averageLong /= len(longArray)
    
    return averageLat, averageLong

## Normalizing POI center

In [31]:
def normalize_POI(poi_dataframe):
    posArray = poi_dataframe['Center']
    temp_general_array = []
    for i in range(0, len(posArray)):
        
        
        temp_index_array = []
        temp_lat_array = []
        temp_lng_array = []
        
        if i not in temp_general_array:
            temp_lat_array.append(poi_dataframe.iloc[i]['Center'][0])
            temp_lng_array.append(poi_dataframe.iloc[i]['Center'][1])
            temp_index_array.append(i)
            temp_general_array.append(i)

            for j in range(0,len(posArray)):
                if (distance(posArray[i][0],posArray[i][1],posArray[j][0],posArray[j][1]) < diameter):

                    if i!=j:
                        if j not in temp_general_array:
#                             print(j)
                            temp_index_array.append(j)
                            temp_lat_array.append(poi_dataframe.iloc[j]['Center'][0])
                            temp_lng_array.append(poi_dataframe.iloc[j]['Center'][1])
                            temp_general_array.append(j)
                
            center = getCenter(temp_lat_array,temp_lng_array)
            print(center)

            for index in temp_index_array:
                poi_dataframe.iloc[index]['Center'] = center
                #print(poi_dataframe.iloc[index]['Center'])
                
    return poi_dataframe

                

In [32]:
test_file_path = "./poi/poi_user_1.csv"

test_df = pd.read_csv(test_file_path)

In [33]:
# test_df['Center'] = test_df[test_df['Center'][1:-1].replace(" ", "").split(',')]

def change_to_pair(row):
    string = row['Center']
    ret = string[1:-1].replace(" ", "").split(',')
    ret[0] = float(ret[0])
    ret[1] = float(ret[1])
    return ret

test_df['Center'] = test_df.apply(change_to_pair, axis=1)

In [34]:
test_df

Unnamed: 0.1,Unnamed: 0,Entry_date,DeltaT,Center,Size
0,0,2014-10-04 08:40:42,115383.0,"[43.4093233098101, 3.6875304057658993]",160803
1,1,2014-10-07 12:30:19,67454.0,"[45.77180590163932, 4.869328661202186]",61
2,2,2014-10-08 07:33:46,30925.0,"[45.78580376006621, 4.8792403489572616]",4843
3,3,2014-10-08 16:23:44,54175.0,"[45.77310891933308, 4.8717314202525595]",2719
4,4,2014-10-09 07:37:55,9590.0,"[45.78611514360843, 4.879920111245953]",824
5,5,2014-10-09 10:22:46,87778.0,"[45.785451362770544, 4.875586564692049]",16442
6,6,2014-10-10 18:15:02,93685.0,"[45.77012773458853, 4.869753658542806]",12966
7,7,2014-10-16 07:45:33,86490.0,"[45.78335795931757, 4.878596522309718]",508
8,8,2014-10-19 18:13:45,40093.0,"[43.40931714656869, 3.6875231168247287]",44009
9,9,2014-10-20 13:03:50,16536.0,"[45.67952350877192, 5.188790087719293]",19


In [35]:
test_df = normalize_POI(test_df)

(43.409320228189394, 3.687526761295314)
(45.771095535331014, 4.870090142217676)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(45.78572954766084, 4.878721991568549)
(45.67952350877192, 5.188790087719293)
(46.168543333333304, 4.86189833333333)
(45.76124666666689, 4.814930000000003)
(45.272995, 5.00879166666667)
(45.70637601489758, 4.843654199255122)
(43.57229395832473, 1.486985364199482)
(43.60171127437651, 1.4558767981859364)
(43.201436666666666, 1.229735402298851)
(43.220019222222206, 3.093917444444444)
(47.876651666666696, 3.86840166666667)
(30.257348817027285, -97.75025684285042)
(30.249694182487797, -97.74986744867212)
(30.264709999275286, -97.76371772869736)
(30.267746217536274, -97.74270766189638)
(30.271781792689705, -97.7523998416232)
(30.266073001490522, -97.7378412486401)


In [20]:
test_df

Unnamed: 0.1,Unnamed: 0,Entry_date,DeltaT,Center,Size
0,0,2014-10-04 08:40:42,115383.0,"[43.4093233098101, 3.6875304057658993]",160803
1,1,2014-10-07 12:30:19,67454.0,"[45.77180590163932, 4.869328661202186]",61
2,2,2014-10-08 07:33:46,30925.0,"[45.78580376006621, 4.8792403489572616]",4843
3,3,2014-10-08 16:23:44,54175.0,"[45.77310891933308, 4.8717314202525595]",2719
4,4,2014-10-09 07:37:55,9590.0,"[45.78611514360843, 4.879920111245953]",824
5,5,2014-10-09 10:22:46,87778.0,"[45.785451362770544, 4.875586564692049]",16442
6,6,2014-10-10 18:15:02,93685.0,"[45.77012773458853, 4.869753658542806]",12966
7,7,2014-10-16 07:45:33,86490.0,"[45.78335795931757, 4.878596522309718]",508
8,8,2014-10-19 18:13:45,40093.0,"[43.40931714656869, 3.6875231168247287]",44009
9,9,2014-10-20 13:03:50,16536.0,"[45.67952350877192, 5.188790087719293]",19


In [38]:
distance_test=distance(45.67952350877192, 5.188790087719293,45.70637601489758, 4.843654199255122)
distance_test

26972.46995017536