# Finding work and leaving place

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from datetime import datetime, timedelta
import math

### Get distance between two points

In [2]:
diameter = 500 ##Diameter of POI (in meter)

In [3]:
def distance(lat1,long1,lat2,long2):
    earthRadius = 6371000.0
    
    dLat = math.radians(lat2-lat1)
    dLong = math.radians(long2-long1)
    
    a = math.sin(dLat/2)*math.sin(dLat/2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dLong/2) * math.sin(dLong/2)
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    dist = earthRadius * c
    
    return dist

### Get center of points distribution

In [4]:
def getCenter(latArray, longArray):
    averageLat = 0.0
    averageLong = 0.0
    
    for d in latArray:
        averageLat += d
    for d in longArray:
        averageLong += d
        
    averageLat /= len(latArray)
    averageLong /= len(longArray)
    
    center=[averageLat, averageLong]
    return center

## Normalizing POI center

In [5]:
def normalize_POI(poi_dataframe):
    posArray = poi_dataframe['Center']
    temp_general_array = []
    for i in range(0, len(posArray)):
        
        
        temp_index_array = []
        temp_lat_array = []
        temp_lng_array = []
        
        if i not in temp_general_array:
            temp_lat_array.append(poi_dataframe.iloc[i]['Center'][0])
            temp_lng_array.append(poi_dataframe.iloc[i]['Center'][1])
            temp_index_array.append(i)
            temp_general_array.append(i)

            for j in range(0,len(posArray)):
                if (distance(posArray[i][0],posArray[i][1],posArray[j][0],posArray[j][1]) < diameter):

                    if i!=j:
                        if j not in temp_general_array:
                            print(j)
                            temp_index_array.append(j)
                            temp_lat_array.append(poi_dataframe.iloc[j]['Center'][0])
                            temp_lng_array.append(poi_dataframe.iloc[j]['Center'][1])
                            temp_general_array.append(j)
                
            center = getCenter(temp_lat_array,temp_lng_array)
            #print(type(center))

            for index in temp_index_array:
                print(poi_dataframe.iloc[index]['Center'])
                poi_dataframe.iloc[index]['Center'][0] = center[0]
                poi_dataframe.iloc[index]['Center'][1] = center[1]
                print("lat ",center[0])
                print("lng ",center[1])
                print("center ",center)
                print(poi_dataframe.iloc[index]['Center'])
                
    return poi_dataframe

                

In [6]:
test_file_path = "./poi/poi_user_1.csv"

test_df = pd.read_csv(test_file_path)

In [7]:
# test_df['Center'] = test_df[test_df['Center'][1:-1].replace(" ", "").split(',')]

def change_to_pair(row):
    string = row['Center']
    ret = string[1:-1].replace(" ", "").split(',')
    ret[0] = float(ret[0])
    ret[1] = float(ret[1])
    return ret

test_df['Center'] = test_df.apply(change_to_pair, axis=1)

In [8]:
test_df = normalize_POI(test_df)

8
[43.4093233098101, 3.6875304057658993]
lat  43.409320228189394
lng  3.687526761295314
center  [43.409320228189394, 3.687526761295314]
[43.409320228189394, 3.687526761295314]
[43.40931714656869, 3.6875231168247287]
lat  43.409320228189394
lng  3.687526761295314
center  [43.409320228189394, 3.687526761295314]
[43.409320228189394, 3.687526761295314]
3
6
11
15
18
19
22
32
34
36
40
41
43
44
45
46
47
50
52
54
56
57
[45.77180590163932, 4.869328661202186]
lat  45.771095535331014
lng  4.870090142217676
center  [45.771095535331014, 4.870090142217676]
[45.771095535331014, 4.870090142217676]
[45.77310891933308, 4.8717314202525595]
lat  45.771095535331014
lng  4.870090142217676
center  [45.771095535331014, 4.870090142217676]
[45.771095535331014, 4.870090142217676]
[45.77012773458853, 4.869753658542806]
lat  45.771095535331014
lng  4.870090142217676
center  [45.771095535331014, 4.870090142217676]
[45.771095535331014, 4.870090142217676]
[45.77048919053928, 4.869696785846327]
lat  45.771095535331014

In [11]:
test_df

Unnamed: 0.1,Unnamed: 0,Entry_date,DeltaT,Center,Size
0,0,2014-10-04 08:40:42,115383.0,"[43.409320228189394, 3.687526761295314]",160803
1,1,2014-10-07 12:30:19,67454.0,"[45.771095535331014, 4.870090142217676]",61
2,2,2014-10-08 07:33:46,30925.0,"[45.78572954766084, 4.878721991568549]",4843
3,3,2014-10-08 16:23:44,54175.0,"[45.771095535331014, 4.870090142217676]",2719
4,4,2014-10-09 07:37:55,9590.0,"[45.78572954766084, 4.878721991568549]",824
...,...,...,...,...,...
74,74,2014-12-11 00:57:30,11809.0,"[30.267746217536274, -97.74270766189638]",1870
75,75,2014-12-11 04:22:07,22377.0,"[30.257348817027285, -97.75025684285042]",44144
76,76,2014-12-11 10:35:05,13368.0,"[30.257348817027285, -97.75025684285042]",8488
77,77,2014-12-11 16:14:40,12507.0,"[30.266073001490522, -97.7378412486401]",8653


In [12]:
print(type(test_df['Entry_date'][0]))

<class 'str'>


In [15]:
test_df['Entry_date']= pd.to_datetime(test_df['Entry_date']) 
print(test_df['Entry_date'])

0    2014-10-04 08:40:42
1    2014-10-07 12:30:19
2    2014-10-08 07:33:46
3    2014-10-08 16:23:44
4    2014-10-09 07:37:55
             ...        
74   2014-12-11 00:57:30
75   2014-12-11 04:22:07
76   2014-12-11 10:35:05
77   2014-12-11 16:14:40
78   2014-12-13 00:38:14
Name: Entry_date, Length: 79, dtype: datetime64[ns]


In [16]:
test_df["Week_day"]=""

for index, day in test_df['Entry_date'].iteritems():
    test_df["Week_day"][index]=day.weekday()
    
test_df["Week_day"]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


0     5
1     1
2     2
3     2
4     3
     ..
74    3
75    3
76    3
77    3
78    5
Name: Week_day, Length: 79, dtype: object

In [58]:
test_df.drop("Unnamed: 0",axis=1,inplace=True)

In [59]:
test_df

Unnamed: 0,Entry_date,DeltaT,Center,Size,Week_day
0,2014-10-04 08:40:42,115383.0,"[43.409320228189394, 3.687526761295314]",160803,5
1,2014-10-07 12:30:19,67454.0,"[45.771095535331014, 4.870090142217676]",61,1
2,2014-10-08 07:33:46,30925.0,"[45.78572954766084, 4.878721991568549]",4843,2
3,2014-10-08 16:23:44,54175.0,"[45.771095535331014, 4.870090142217676]",2719,2
4,2014-10-09 07:37:55,9590.0,"[45.78572954766084, 4.878721991568549]",824,3
...,...,...,...,...,...
74,2014-12-11 00:57:30,11809.0,"[30.267746217536274, -97.74270766189638]",1870,3
75,2014-12-11 04:22:07,22377.0,"[30.257348817027285, -97.75025684285042]",44144,3
76,2014-12-11 10:35:05,13368.0,"[30.257348817027285, -97.75025684285042]",8488,3
77,2014-12-11 16:14:40,12507.0,"[30.266073001490522, -97.7378412486401]",8653,3


## Now we try to find work and living place using temporal informations

In [62]:
def find_house_and_work_place(df):
    house_poi_array = pd.DataFrame(columns=['Entry_date', 'DeltaT', 'Center','Size','Week_day'])
    work_poi_array = pd.DataFrame(columns=['Entry_date', 'DeltaT', 'Center','Size','Week_day'])
    
    for index, row in df.iterrows():
        
        day = int(row['Week_day'])
        entry_hour = int(row['Entry_date'].hour)
        exit_hour = row['Entry_date'] + np.timedelta64(int(row['DeltaT']), 's')
        exit_hour = int(exit_hour.hour)
        
        if (day in range(0,5)) and (entry_hour in range(8,10)) and (exit_hour in range(16,19)) :
            print("WORK POI")
            work_poi_array.append(row)
            
        if (day in range(0,5)) and (entry_hour in range(19,0)) and (exit_hour in range(5,9)):
            print("HOUSE POI")
            house_poi_array.append(row)
            
        if (day in range(6,7)):
            print("HOUSE POI")
            house_poi_array.append(row)
            
#     print(work_poi_array)    
#     work_place = work_poi_array['Center'].mode()
#     house = house_poi_array['Center'].mode()

    
    
    ret_df = pd.Dataframe(columns=['Center'])
    ret.append(house)
    re.append(work_place)
    
    return ret_df

In [61]:
house_and_work = find_house_and_work_place(test_df)

HOUSE POI
WORK POI
HOUSE POI
HOUSE POI
HOUSE POI
HOUSE POI
WORK POI
HOUSE POI
HOUSE POI
HOUSE POI


AttributeError: module 'pandas' has no attribute 'Dataframe'