# EOD data inspection

In [1]:
import seaborn as sns
import pandas as pd
import geopandas as gpd

%matplotlib inline
sns.set(context='notebook', font='Lucida Sans Unicode', style='white', palette='plasma')

In [109]:
eod_trips = pd.read_csv('../data/other_format/EOD_STGO/viajes.csv', sep=';')
eod_trips = eod_trips[['Persona', 'Viaje', 'OrigenCoordX', 'OrigenCoordY', 'DestinoCoordX', 'DestinoCoordY', 'HoraIni', 'HoraFin']]
eod_trips

Unnamed: 0,Persona,Viaje,OrigenCoordX,OrigenCoordY,DestinoCoordX,DestinoCoordY,HoraIni,HoraFin
0,17343102,1734310202,3352087188,6288387,3388123125,6292391,22:30,23:40
1,17344101,1734410101,3385364375,6291928,3542673438,6302297,13:00,14:45
2,17344101,1734410102,3542673438,6302297,3385364375,6291928,22:00,23:30
3,17344103,1734410301,3385364375,6291928,3508416563,6297212,9:00,9:55
4,17344103,1734410302,3508416563,6297212,3385364375,6291928,19:00,21:30
...,...,...,...,...,...,...,...,...
113586,74381301,7438130102,3495569063,62903335,3494102813,6289669,12:00,12:15
113587,74381302,7438130201,3494102813,6289669,3495569063,62903335,10:30,10:35
113588,74381302,7438130202,3495569063,62903335,3494102813,6289669,12:00,12:15
113589,74382301,7438230101,,,,,7:30,7:40


In [110]:
trips_cleaned = eod_trips.dropna()
trips_cleaned

Unnamed: 0,Persona,Viaje,OrigenCoordX,OrigenCoordY,DestinoCoordX,DestinoCoordY,HoraIni,HoraFin
0,17343102,1734310202,3352087188,6288387,3388123125,6292391,22:30,23:40
1,17344101,1734410101,3385364375,6291928,3542673438,6302297,13:00,14:45
2,17344101,1734410102,3542673438,6302297,3385364375,6291928,22:00,23:30
3,17344103,1734410301,3385364375,6291928,3508416563,6297212,9:00,9:55
4,17344103,1734410302,3508416563,6297212,3385364375,6291928,19:00,21:30
...,...,...,...,...,...,...,...,...
113584,70679101,7067910102,3384504063,62997255,3385365625,62996395,12:40,12:48
113585,74381301,7438130101,3494102813,6289669,3495569063,62903335,10:30,10:45
113586,74381301,7438130102,3495569063,62903335,3494102813,6289669,12:00,12:15
113587,74381302,7438130201,3494102813,6289669,3495569063,62903335,10:30,10:35


In [111]:
from datetime import datetime, timedelta

def set_origin_time(hour_origin):
    dt1 = datetime.strptime(hour_origin, '%H:%M').replace(year=2012, month=1, day=1) # Set dummy date(year, month,day)
    return dt1

def set_destination_time(row):
    hour_origin = row['HoraIni']
    hour_destination = row['HoraFin']
    dt1 = datetime.strptime(hour_origin, '%H:%M').replace(year=2012, month=1, day=1) # Set dummy date(year, month,day)
    dt2 = datetime.strptime(hour_destination, '%H:%M').replace(year=2012, month=1, day=1) # Set dummy date(year, month,day)
    time_difference = dt2 - dt1
    if time_difference.total_seconds() < 0:
        return dt2 + timedelta(days=1)
    else:
        return dt2

In [112]:
pd.options.mode.chained_assignment = None
trips_cleaned['o_time'] = trips_cleaned['HoraIni'].apply(set_origin_time)
trips_cleaned['d_time'] = trips_cleaned.apply(set_destination_time, axis=1)
trips_cleaned

Unnamed: 0,Persona,Viaje,OrigenCoordX,OrigenCoordY,DestinoCoordX,DestinoCoordY,HoraIni,HoraFin,o_time,d_time
0,17343102,1734310202,3352087188,6288387,3388123125,6292391,22:30,23:40,2012-01-01 22:30:00,2012-01-01 23:40:00
1,17344101,1734410101,3385364375,6291928,3542673438,6302297,13:00,14:45,2012-01-01 13:00:00,2012-01-01 14:45:00
2,17344101,1734410102,3542673438,6302297,3385364375,6291928,22:00,23:30,2012-01-01 22:00:00,2012-01-01 23:30:00
3,17344103,1734410301,3385364375,6291928,3508416563,6297212,9:00,9:55,2012-01-01 09:00:00,2012-01-01 09:55:00
4,17344103,1734410302,3508416563,6297212,3385364375,6291928,19:00,21:30,2012-01-01 19:00:00,2012-01-01 21:30:00
...,...,...,...,...,...,...,...,...,...,...
113584,70679101,7067910102,3384504063,62997255,3385365625,62996395,12:40,12:48,2012-01-01 12:40:00,2012-01-01 12:48:00
113585,74381301,7438130101,3494102813,6289669,3495569063,62903335,10:30,10:45,2012-01-01 10:30:00,2012-01-01 10:45:00
113586,74381301,7438130102,3495569063,62903335,3494102813,6289669,12:00,12:15,2012-01-01 12:00:00,2012-01-01 12:15:00
113587,74381302,7438130201,3494102813,6289669,3495569063,62903335,10:30,10:35,2012-01-01 10:30:00,2012-01-01 10:35:00


In [113]:
trips_cleaned['OrigenCoordX'] = trips_cleaned['OrigenCoordX'].apply(lambda x: x.replace(",", "."))
trips_cleaned['OrigenCoordY'] = trips_cleaned['OrigenCoordY'].apply(lambda x: x.replace(",", "."))
trips_cleaned['DestinoCoordX'] = trips_cleaned['DestinoCoordX'].apply(lambda x: x.replace(",", "."))
trips_cleaned['DestinoCoordY'] = trips_cleaned['DestinoCoordY'].apply(lambda x: x.replace(",", "."))

In [114]:
import h3
resolution = 12

current_crs = 'EPSG:5361'
target_crs = 'EPSG:4326'

origin_points = gpd.points_from_xy(trips_cleaned.OrigenCoordX, trips_cleaned.OrigenCoordY, crs=current_crs).to_crs(target_crs)
dest_points = gpd.points_from_xy(trips_cleaned.DestinoCoordX, trips_cleaned.DestinoCoordY, crs=current_crs).to_crs(target_crs)

trips_cleaned['o_lat'] = [p.y for p in origin_points]
trips_cleaned['o_lon'] = [p.x for p in origin_points]
trips_cleaned['d_lat'] = [p.y for p in dest_points]
trips_cleaned['d_lon'] = [p.x for p in dest_points]
trips_cleaned['o_h3_cell'] = [h3.latlng_to_cell(p.y, p.x, resolution) for p in origin_points]
trips_cleaned['d_h3_cell'] = [h3.latlng_to_cell(p.y, p.x, resolution) for p in dest_points]
trips_cleaned

Unnamed: 0,Persona,Viaje,OrigenCoordX,OrigenCoordY,DestinoCoordX,DestinoCoordY,HoraIni,HoraFin,o_time,d_time,o_lat,o_lon,d_lat,d_lon,o_h3_cell,d_h3_cell
0,17343102,1734310202,335208.7188,6288387,338812.3125,6292391,22:30,23:40,2012-01-01 22:30:00,2012-01-01 23:40:00,-33.531422,-70.774666,-33.495874,-70.735153,8cb2c540d0a89ff,8cb2c5429b60dff
1,17344101,1734410101,338536.4375,6291928,354267.3438,6302297,13:00,14:45,2012-01-01 13:00:00,2012-01-01 14:45:00,-33.500007,-70.738205,-33.408776,-70.567232,8cb2c5429ae5dff,8cb2c519a994dff
2,17344101,1734410102,354267.3438,6302297,338536.4375,6291928,22:00,23:30,2012-01-01 22:00:00,2012-01-01 23:30:00,-33.408776,-70.567232,-33.500007,-70.738205,8cb2c519a994dff,8cb2c5429ae5dff
3,17344103,1734410301,338536.4375,6291928,350841.6563,6297212,9:00,9:55,2012-01-01 09:00:00,2012-01-01 09:55:00,-33.500007,-70.738205,-33.454153,-70.604904,8cb2c5429ae5dff,8cb2c554db113ff
4,17344103,1734410302,350841.6563,6297212,338536.4375,6291928,19:00,21:30,2012-01-01 19:00:00,2012-01-01 21:30:00,-33.454153,-70.604904,-33.500007,-70.738205,8cb2c554db113ff,8cb2c5429ae5dff
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113584,70679101,7067910102,338450.4063,6299725.5,338536.5625,6299639.5,12:40,12:48,2012-01-01 12:40:00,2012-01-01 12:48:00,-33.429695,-70.737727,-33.430483,-70.736816,8cb2c55509217ff,8cb2c55509297ff
113585,74381301,7438130101,349410.2813,6289669,349556.9063,6290333.5,10:30,10:45,2012-01-01 10:30:00,2012-01-01 10:45:00,-33.521961,-70.621567,-33.515990,-70.619877,8cb2c50927283ff,8cb2c50921181ff
113586,74381301,7438130102,349556.9063,6290333.5,349410.2813,6289669,12:00,12:15,2012-01-01 12:00:00,2012-01-01 12:15:00,-33.515990,-70.619877,-33.521961,-70.621567,8cb2c50921181ff,8cb2c50927283ff
113587,74381302,7438130201,349410.2813,6289669,349556.9063,6290333.5,10:30,10:35,2012-01-01 10:30:00,2012-01-01 10:35:00,-33.521961,-70.621567,-33.515990,-70.619877,8cb2c50927283ff,8cb2c50921181ff


In [115]:
trips_cleaned = trips_cleaned.drop(['OrigenCoordX', 'OrigenCoordY', 'DestinoCoordX', 'DestinoCoordY','HoraIni', 'HoraFin', 'Viaje'], axis=1)
trips_cleaned = trips_cleaned.rename({'Persona': 'user_id'}, axis=1)


trips_to_file = trips_cleaned[['user_id', 'o_lon', 'o_lat', 'd_lon', 'd_lat', 'o_h3_cell', 'd_h3_cell', 'o_time', 'd_time']]
trips_to_file

Unnamed: 0,user_id,o_lon,o_lat,d_lon,d_lat,o_h3_cell,d_h3_cell,o_time,d_time
0,17343102,-70.774666,-33.531422,-70.735153,-33.495874,8cb2c540d0a89ff,8cb2c5429b60dff,2012-01-01 22:30:00,2012-01-01 23:40:00
1,17344101,-70.738205,-33.500007,-70.567232,-33.408776,8cb2c5429ae5dff,8cb2c519a994dff,2012-01-01 13:00:00,2012-01-01 14:45:00
2,17344101,-70.567232,-33.408776,-70.738205,-33.500007,8cb2c519a994dff,8cb2c5429ae5dff,2012-01-01 22:00:00,2012-01-01 23:30:00
3,17344103,-70.738205,-33.500007,-70.604904,-33.454153,8cb2c5429ae5dff,8cb2c554db113ff,2012-01-01 09:00:00,2012-01-01 09:55:00
4,17344103,-70.604904,-33.454153,-70.738205,-33.500007,8cb2c554db113ff,8cb2c5429ae5dff,2012-01-01 19:00:00,2012-01-01 21:30:00
...,...,...,...,...,...,...,...,...,...
113584,70679101,-70.737727,-33.429695,-70.736816,-33.430483,8cb2c55509217ff,8cb2c55509297ff,2012-01-01 12:40:00,2012-01-01 12:48:00
113585,74381301,-70.621567,-33.521961,-70.619877,-33.515990,8cb2c50927283ff,8cb2c50921181ff,2012-01-01 10:30:00,2012-01-01 10:45:00
113586,74381301,-70.619877,-33.515990,-70.621567,-33.521961,8cb2c50921181ff,8cb2c50927283ff,2012-01-01 12:00:00,2012-01-01 12:15:00
113587,74381302,-70.621567,-33.521961,-70.619877,-33.515990,8cb2c50927283ff,8cb2c50921181ff,2012-01-01 10:30:00,2012-01-01 10:35:00


In [117]:
trips_to_file.index.name="trip_id"
trips_to_file

Unnamed: 0_level_0,user_id,o_lon,o_lat,d_lon,d_lat,o_h3_cell,d_h3_cell,o_time,d_time
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,17343102,-70.774666,-33.531422,-70.735153,-33.495874,8cb2c540d0a89ff,8cb2c5429b60dff,2012-01-01 22:30:00,2012-01-01 23:40:00
1,17344101,-70.738205,-33.500007,-70.567232,-33.408776,8cb2c5429ae5dff,8cb2c519a994dff,2012-01-01 13:00:00,2012-01-01 14:45:00
2,17344101,-70.567232,-33.408776,-70.738205,-33.500007,8cb2c519a994dff,8cb2c5429ae5dff,2012-01-01 22:00:00,2012-01-01 23:30:00
3,17344103,-70.738205,-33.500007,-70.604904,-33.454153,8cb2c5429ae5dff,8cb2c554db113ff,2012-01-01 09:00:00,2012-01-01 09:55:00
4,17344103,-70.604904,-33.454153,-70.738205,-33.500007,8cb2c554db113ff,8cb2c5429ae5dff,2012-01-01 19:00:00,2012-01-01 21:30:00
...,...,...,...,...,...,...,...,...,...
113584,70679101,-70.737727,-33.429695,-70.736816,-33.430483,8cb2c55509217ff,8cb2c55509297ff,2012-01-01 12:40:00,2012-01-01 12:48:00
113585,74381301,-70.621567,-33.521961,-70.619877,-33.515990,8cb2c50927283ff,8cb2c50921181ff,2012-01-01 10:30:00,2012-01-01 10:45:00
113586,74381301,-70.619877,-33.515990,-70.621567,-33.521961,8cb2c50921181ff,8cb2c50927283ff,2012-01-01 12:00:00,2012-01-01 12:15:00
113587,74381302,-70.621567,-33.521961,-70.619877,-33.515990,8cb2c50927283ff,8cb2c50921181ff,2012-01-01 10:30:00,2012-01-01 10:35:00


In [118]:
trips_to_file.to_parquet('../data/unified_format/santiago_eod_unitrip.parquet', engine='pyarrow', compression='snappy')