In [3]:
from matplotlib import pyplot as plt
import numpy as np
import math

import pandas as pd

df_rows = pd.read_csv("./taxi_dataset/training_data_simple.csv", sep='\n', nrows=5, header=None)
df = df_rows[0].str.split(',', expand=True)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,663,664,665,666,667,668,669,670,671,672
0,1372638303,-8.587116,41.162427,-8.586198,41.162112,-8.585982,41.161338,-8.585037,41.160024,-8.584146,...,,,,,,,,,,
1,1372646650,-8.6103,41.153688,-8.610336,41.153679,-8.610192,41.154039,-8.609985,41.154759,-8.609931,...,,,,,,,,,,
2,1372645583,-8.591301,41.156163,-8.592651,41.156163,-8.594415,41.156298,-8.594451,41.156316,-8.59446,...,,,,,,,,,,
3,1372651427,-8.610291,41.153625,-8.61093,41.153643,-8.611965,41.153859,-8.611938,41.154651,-8.611884,...,,,,,,,,,,
4,1372652856,-8.61372,41.148414,-8.614287,41.148414,-8.614431,41.148387,-8.614845,41.147451,-8.614872,...,,,,,,,,,,


In [4]:
df = df.iloc[:7,:7]
df.columns = ["start_time", "lon_1", "lat_1", "lon_2", "lat_2", "lon_3", "lat_3"]
df

Unnamed: 0,start_time,lon_1,lat_1,lon_2,lat_2,lon_3,lat_3
0,1372638303,-8.587116,41.162427,-8.586198,41.162112,-8.585982,41.161338
1,1372646650,-8.6103,41.153688,-8.610336,41.153679,-8.610192,41.154039
2,1372645583,-8.591301,41.156163,-8.592651,41.156163,-8.594415,41.156298
3,1372651427,-8.610291,41.153625,-8.61093,41.153643,-8.611965,41.153859
4,1372652856,-8.61372,41.148414,-8.614287,41.148414,-8.614431,41.148387


In [75]:
from operator import attrgetter

class Trajectory:
    def __init__(self, id, df_row):
        self.id = id
        self.first_timestamp = int(df_row[0])
        
        # populate points array
        self.points = []
        self.points.append(Point(time=int(self.first_timestamp), lon=df_row[1], lat=df_row[2]))
        for i in range(3, df_row.size, 2):
            this_timestamp = int(self.first_timestamp) + (i//2)*15
            self.points.append(Point(time=this_timestamp, lon=df_row[i], lat=df_row[i+1]))

        # find trajectory min and max lat, lon
        self.min_lat = (min(self.points,key=attrgetter('lat')).lat)
        self.max_lat = (max(self.points,key=attrgetter('lat')).lat)
        self.min_lon = (min(self.points,key=attrgetter('lon')).lon)
        self.max_lon = (max(self.points,key=attrgetter('lon')).lon)

    def __str__(self):
        return("\n{:10s}: [ID: {:5d}, Time_First: {:10d}, Points: {}] \n{:10s}  [LAT_range: ({:5f}, {:5f}), LON_range: ({:5f}, {:5f}])".\
                    format("Trajectory", self.id, self.first_timestamp, len(self.points),\
                            "", self.min_lat, self.max_lat, self.min_lon, self.max_lon))
            
    def __repr__(self):
        print("\nin __repr__, calling __str__")
        return str(self)

    def get_points_info(self):
        i = 1
        for point in self.points:
            print(i, point)
            i+=1

    def len_points_in_traj(self):
        return len(self.points)

    def get_list_points(self):
        return self.points
        
        

class Point:
    def __init__(self, time, lon, lat):
        self.timestamp = time
        self.lon = float(lon)
        self.lat = float(lat)
        
        self.prev_pt_time, self.next_pt_time = -1
    
    def __str__(self):
        return ("{:10s}: [Timestamp: {:10d}, Longitude: {:9f}, Latitude: {:9f}]"\
            .format("Point", self.timestamp, self.lon, self.lat))

    def __repr__(self):
        print("\nin __repr__, calling __str__")
        return str(self)

In [76]:
taxi_trajectories = []

counter = 0
for index, row in df.iterrows():
    taxi_trajectories.append(Trajectory(counter, row))
    counter += 1

for traj in taxi_trajectories:
    print(traj)
    traj.get_points_info()
    print()


Trajectory: [ID:     0, Time_First: 1372638303, Points: 3] 
            [LAT_range: (41.161338, 41.162427), LON_range: (-8.587116, -8.585982])
1 Point     : [Timestamp: 1372638303, Longitude: -8.587116, Latitude: 41.162427]
2 Point     : [Timestamp: 1372638318, Longitude: -8.586198, Latitude: 41.162112]
3 Point     : [Timestamp: 1372638333, Longitude: -8.585982, Latitude: 41.161338]


Trajectory: [ID:     1, Time_First: 1372646650, Points: 3] 
            [LAT_range: (41.153679, 41.154039), LON_range: (-8.610336, -8.610192])
1 Point     : [Timestamp: 1372646650, Longitude: -8.610300, Latitude: 41.153688]
2 Point     : [Timestamp: 1372646665, Longitude: -8.610336, Latitude: 41.153679]
3 Point     : [Timestamp: 1372646680, Longitude: -8.610192, Latitude: 41.154039]


Trajectory: [ID:     2, Time_First: 1372645583, Points: 3] 
            [LAT_range: (41.156163, 41.156298), LON_range: (-8.594415, -8.591301])
1 Point     : [Timestamp: 1372645583, Longitude: -8.591301, Latitude: 41.156163]

In [101]:
grid_num = []

for traj in taxi_trajectories:

    points_ls = traj.get_list_points()
    grid_ls = []
    for pt in points_ls:
        unit_pt = [pt.timestamp, pt.lon, pt.lat]
        grid_ls.append(unit_pt)
    
    grid_num.append(grid_ls)

print(pd.DataFrame(grid_num))

                                    0                                   1  \
0  [1372638303, -8.587116, 41.162427]  [1372638318, -8.586198, 41.162112]   
1    [1372646650, -8.6103, 41.153688]  [1372646665, -8.610336, 41.153679]   
2  [1372645583, -8.591301, 41.156163]  [1372645598, -8.592651, 41.156163]   
3  [1372651427, -8.610291, 41.153625]   [1372651442, -8.61093, 41.153643]   
4   [1372652856, -8.61372, 41.148414]  [1372652871, -8.614287, 41.148414]   

                                    2  
0  [1372638333, -8.585982, 41.161338]  
1  [1372646680, -8.610192, 41.154039]  
2  [1372645613, -8.594415, 41.156298]  
3  [1372651457, -8.611965, 41.153859]  
4  [1372652886, -8.614431, 41.148387]  


In [None]:
grid_obj = []
for traj in taxi_trajectories:
    grid_obj.append(traj.get_list_points())
    