In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import pyproj

ndf = pd.read_csv('data.csv')

In [2]:
ndf.set_index('ID')

Unnamed: 0_level_0,user_id,latitude,longitude,flag,time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
37,0,30.216216,-81.506529,True,2022-09-09 01:01:29
38,0,30.216200,-81.506517,True,2022-09-09 01:01:53
39,0,30.216216,-81.506529,True,2022-09-09 01:02:35
40,0,30.216216,-81.506529,True,2022-09-09 01:03:13
41,0,30.216216,-81.506529,True,2022-09-09 01:03:54
...,...,...,...,...,...
5844351,12419,30.382490,-81.611984,True,2022-10-27 01:08:48
5844352,12419,30.382490,-81.611984,True,2022-10-27 01:09:53
5844412,12420,30.336060,-81.757240,True,2022-10-31 02:06:17
5844413,12420,30.336050,-81.757240,True,2022-10-31 02:06:46


In [3]:
# Create a transformer object to convert from EPSG 4326 (WGS84) to EPSG 3857 (Web Mercator)
transformer = pyproj.Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)

# Iterate over rows and apply the transformer to convert the coordinates
for idx, row in tqdm(ndf.iterrows(), total=ndf.shape[0]):
    lat, lon = row['latitude'], row['longitude']
    x, y = transformer.transform(lon, lat)
    ndf.at[idx, 'Lat-3857'] = y
    ndf.at[idx, 'Lon-3857'] = x

# Print the updated DataFrame
print(ndf)

100%|██████████| 3917137/3917137 [06:20<00:00, 10292.35it/s]

              ID  user_id   latitude  longitude  flag                 time  \
0             37        0  30.216216 -81.506529  True  2022-09-09 01:01:29   
1             38        0  30.216200 -81.506517  True  2022-09-09 01:01:53   
2             39        0  30.216216 -81.506529  True  2022-09-09 01:02:35   
3             40        0  30.216216 -81.506529  True  2022-09-09 01:03:13   
4             41        0  30.216216 -81.506529  True  2022-09-09 01:03:54   
...          ...      ...        ...        ...   ...                  ...   
3917132  5844351    12419  30.382490 -81.611984  True  2022-10-27 01:08:48   
3917133  5844352    12419  30.382490 -81.611984  True  2022-10-27 01:09:53   
3917134  5844412    12420  30.336060 -81.757240  True  2022-10-31 02:06:17   
3917135  5844413    12420  30.336050 -81.757240  True  2022-10-31 02:06:46   
3917136  5844414    12420  30.336050 -81.757240  True  2022-10-31 02:07:50   

             Lat-3857      Lon-3857  
0        3.531373e+06 -9.




In [4]:



ID_list=ndf['user_id'].unique()
len(ID_list)



warnings.simplefilter(action='ignore', category=FutureWarning)
def home_location(input_df,s_hour=22,t_hour=6):
    '''
    ndf: gps data
    s_hour: define start time of night, defult = 22
    t_hour: define end time of night, defult = 6
    '''
    input_df['time']=pd.to_datetime(input_df['time'], infer_datetime_format=True)
    input_df['date']=input_df['time'].dt.date
    input_df['hour']=pd.Series(input_df['time']).dt.hour
    
    input_df['LAT_Grid']=np.round(input_df['Lat-3857']/20)*20  # Grid size = 20m
    input_df['LON_Grid']=np.round(input_df['Lon-3857']/20)*20
    id=input_df.iloc[0,0]
    input_df=input_df[(input_df['hour']>=s_hour)|(input_df['hour']<t_hour)]

    if len(input_df)==0:  # if no signal during the night, return NaN
        return np.nan,np.nan
    home = input_df.groupby(['LAT_Grid','LON_Grid'])['date'].nunique().idxmax()
    return home[0],home[1]

home=pd.DataFrame(columns=['ID','LAT','LON']) # initial a DataFrame to save home location

for i in tqdm(ID_list[0:]): 
    dffh=ndf[ndf['user_id']==i].sort_values('time',axis=0,ascending=True) # extract records
    h_lat,h_lon=home_location(dffh) # calculate home location
    home=home.append(pd.DataFrame([[i,h_lat,h_lon]],columns=['ID','LAT','LON'])) # append the new row 

100%|██████████| 12421/12421 [01:41<00:00, 122.97it/s]


In [5]:
# Create a transformer object to convert from EPSG 3857 (Web Mercator) to EPSG 4326 (WGS84)
inputGrid = pyproj.Proj(projparams='epsg:3857')
wgs84 = pyproj.Proj(projparams='epsg:4326')

home = home.reset_index(drop=True)

# Iterate over rows and apply the transformer to convert the coordinates
for idx, row in tqdm(home.iterrows(), total=home.shape[0]):
    lat, lon = pyproj.transform(inputGrid, wgs84, row['LON'], row['LAT'])
    home.at[idx, 'LAT-4326'] = lat
    home.at[idx, 'LON-4326'] = lon

# Print the updated DataFrame
home

100%|██████████| 12421/12421 [00:23<00:00, 539.77it/s]


Unnamed: 0,ID,LAT,LON,LAT-4326,LON-4326
0,0,3531380.0,-9073260.0,30.216272,-81.506481
1,1,3516560.0,-9096800.0,30.101163,-81.717945
2,2,3565080.0,-9087560.0,30.477524,-81.634940
3,3,,,,
4,4,3546060.0,-9072020.0,30.330161,-81.495342
...,...,...,...,...,...
12416,12416,,,,
12417,12417,3546600.0,-9091320.0,30.334348,-81.668717
12418,12418,3541820.0,-9079540.0,30.297280,-81.562896
12419,12419,3552380.0,-9084280.0,30.379152,-81.605476


In [6]:
home.dropna(subset=['LAT'], inplace=True)
home

Unnamed: 0,ID,LAT,LON,LAT-4326,LON-4326
0,0,3531380.0,-9073260.0,30.216272,-81.506481
1,1,3516560.0,-9096800.0,30.101163,-81.717945
2,2,3565080.0,-9087560.0,30.477524,-81.634940
4,4,3546060.0,-9072020.0,30.330161,-81.495342
5,5,3519320.0,-9107220.0,30.122610,-81.811549
...,...,...,...,...,...
12415,12415,3544960.0,-9089520.0,30.321632,-81.652547
12417,12417,3546600.0,-9091320.0,30.334348,-81.668717
12418,12418,3541820.0,-9079540.0,30.297280,-81.562896
12419,12419,3552380.0,-9084280.0,30.379152,-81.605476


In [7]:
home.to_csv('home.csv')