In [1]:
from matplotlib import pyplot as plt
import numpy as np
import math

In [2]:
import pandas as pd

df_rows = pd.read_csv("./taxi_dataset/training_data_simple.csv", sep='\n', nrows=5, header=None)
df = df_rows[0].str.split(',', expand=True)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,663,664,665,666,667,668,669,670,671,672
0,1372638303,-8.587116,41.162427,-8.586198,41.162112,-8.585982,41.161338,-8.585037,41.160024,-8.584146,...,,,,,,,,,,
1,1372646650,-8.6103,41.153688,-8.610336,41.153679,-8.610192,41.154039,-8.609985,41.154759,-8.609931,...,,,,,,,,,,
2,1372645583,-8.591301,41.156163,-8.592651,41.156163,-8.594415,41.156298,-8.594451,41.156316,-8.59446,...,,,,,,,,,,
3,1372651427,-8.610291,41.153625,-8.61093,41.153643,-8.611965,41.153859,-8.611938,41.154651,-8.611884,...,,,,,,,,,,
4,1372652856,-8.61372,41.148414,-8.614287,41.148414,-8.614431,41.148387,-8.614845,41.147451,-8.614872,...,,,,,,,,,,


In [3]:
df = df.iloc[:7,:7]
df.columns = ["start_time", "lon_1", "lat_1", "lon_2", "lat_2", "lon_3", "lat_3"]
df

Unnamed: 0,start_time,lon_1,lat_1,lon_2,lat_2,lon_3,lat_3
0,1372638303,-8.587116,41.162427,-8.586198,41.162112,-8.585982,41.161338
1,1372646650,-8.6103,41.153688,-8.610336,41.153679,-8.610192,41.154039
2,1372645583,-8.591301,41.156163,-8.592651,41.156163,-8.594415,41.156298
3,1372651427,-8.610291,41.153625,-8.61093,41.153643,-8.611965,41.153859
4,1372652856,-8.61372,41.148414,-8.614287,41.148414,-8.614431,41.148387


In [4]:
from operator import attrgetter

class Trajectory:
    def __init__(self, id, df_row):
        self.id = id
        self.first_timestamp = int(df_row[0])
        
        # populate points array
        self.points = []
        self.points.append(Point(time=int(self.first_timestamp), lon=row[1], lat=row[2]))
        for i in range(3, row.size, 2):
            this_timestamp = int(self.first_timestamp) + (i//2)*15
            self.points.append(Point(time=this_timestamp, lon=row[i], lat=row[i+1]))

        # find trajectory min and max lat, lon
        self.min_lat = (min(self.points,key=attrgetter('lat')).lat)
        self.max_lat = (max(self.points,key=attrgetter('lat')).lat)
        self.min_lon = (min(self.points,key=attrgetter('lon')).lon)
        self.max_lon = (max(self.points,key=attrgetter('lon')).lon)
        # print("min_lat:", self.min_lat)
        # print("max_lat:", self.max_lat)
        # print("min_lon:", self.min_lon)
        # print("max_lon:", self.max_lon)

    def __str__(self):
        return("{:10s}: [ID: {:5d}, First Timestamp: {:10d}, Number of Points: {}]".\
            format("Trajectory", self.id, self.first_timestamp, len(self.points)))
            
    def __repr__(self):
        print("\nin __repr__, calling __str__")
        return str(self)

    def get_points_info(self):
        for point in self.points:
            print(point)
        
        

class Point:
    def __init__(self, time, lon, lat):
        self.timestamp = time
        self.lon = float(lon)
        self.lat = float(lat)
    
    def __str__(self):
        return ("{:10s}: [Timestamp: {:10d}, Longitude: {:9f}, Latitude: {:9f}]"\
            .format("Point", self.timestamp, self.lon, self.lat))

    def __repr__(self):
        print("\nin __repr__, calling __str__")
        return str(self)

In [5]:
taxi_trajectories = []

counter = 0
for index, row in df.iterrows():
    taxi_trajectories.append(Trajectory(counter, row))
    counter += 1

for traj in taxi_trajectories:
    print(traj)
    traj.get_points_info()
    print()

Trajectory: [ID:     0, First Timestamp: 1372638303, Number of Points: 3]
Point     : [Timestamp: 1372638303, Longitude: -8.587116, Latitude: 41.162427]
Point     : [Timestamp: 1372638318, Longitude: -8.586198, Latitude: 41.162112]
Point     : [Timestamp: 1372638333, Longitude: -8.585982, Latitude: 41.161338]

Trajectory: [ID:     1, First Timestamp: 1372646650, Number of Points: 3]
Point     : [Timestamp: 1372646650, Longitude: -8.610300, Latitude: 41.153688]
Point     : [Timestamp: 1372646665, Longitude: -8.610336, Latitude: 41.153679]
Point     : [Timestamp: 1372646680, Longitude: -8.610192, Latitude: 41.154039]

Trajectory: [ID:     2, First Timestamp: 1372645583, Number of Points: 3]
Point     : [Timestamp: 1372645583, Longitude: -8.591301, Latitude: 41.156163]
Point     : [Timestamp: 1372645598, Longitude: -8.592651, Latitude: 41.156163]
Point     : [Timestamp: 1372645613, Longitude: -8.594415, Latitude: 41.156298]

Trajectory: [ID:     3, First Timestamp: 1372651427, Number of P

In [6]:
# MIN_LAT = 41.14478; MIN_LON = -8.69346
# MAX_LAT = 41.18652; MAX_LON = -8.57804

# test using first row of taxi trajectories
MIN_LAT=float(taxi_trajectories[0].min_lat); MAX_LAT=float(taxi_trajectories[0].max_lat)
MIN_LON=float(taxi_trajectories[0].min_lon); MAX_LON=float(taxi_trajectories[0].max_lon)

def cal_dis(lat_1,lon_1,lat_2,lon_2):
    lon_1 = lon_1 * math.pi / 180
    lat_1 = lat_1 * math.pi / 180
    lon_2 = lon_2 * math.pi / 180
    lat_2 = lat_2 * math.pi / 180
    a = abs(lat_1 - lat_2)
    b = abs(lon_1 - lon_2)
    d = 2 * 6378.137 * np.arcsin(
        np.sqrt(np.sin(a / 2) * np.sin(a / 2) + np.cos(lat_1) * np.cos(lat_2) * np.sin(b / 2) * np.sin(b / 2)))
    return d

length = cal_dis(lat_1=MIN_LAT, lon_1=(MAX_LON-MIN_LON)/2, lat_2=MAX_LAT, lon_2=(MAX_LON-MIN_LON)/2)
width  = cal_dis(lat_1=(MAX_LAT-MIN_LAT)/2, lon_1=MIN_LON, lat_2=(MAX_LAT-MIN_LAT)/2, lon_2=MAX_LON)

print("length:", length)
print("width:", width)
print("area:", length*width)

length: 0.12122692547381242
width: 0.12623630255378918
area: 0.015303238841777837


In [7]:
UNIT_SIZE = 0.001
length_size = math.ceil(length / UNIT_SIZE)
width_size  = math.ceil(width  / UNIT_SIZE)

print("length_size:", length_size)
print("width_size:", width_size)

grid = [['.']*width_size for i in range(length_size)]
# for i in range(length_size):
#     for j in range(width_size):
#         print(grid[i][j])
    # print('\n')

# np. set_printoptions(threshold=np. inf)
print(np.matrix(grid))

length_size: 122
width_size: 127
[['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ...
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']]


In [9]:
index_point = 0
for point in taxi_trajectories[0].points:
    this_lat = point.lat
    this_lon = point.lon

    # print("dist:", cal_dis(MIN_LAT, MIN_LON, this_lat, this_lon))

    lat_dis = cal_dis(lat_1=MIN_LAT, lon_1=(this_lon-MIN_LON)/2, lat_2=this_lat, lon_2=(this_lon-MIN_LON)/2)
    # print("lat_dis:", lat_dis)

    lon_dis = cal_dis(lat_1=(this_lat-MIN_LAT)/2, lon_1=MIN_LON, lat_2=(this_lat-MIN_LAT)/2, lon_2=this_lon)
    # print("lon_dis:", lon_dis)

    x, y = round(lat_dis), round(lon_dis)
    if grid[x][y] != '.':
        print("Overwriting grid[{}][{}]".format(x,y))
    grid[x][y] = index_point

    index_point += 1
    

np.set_printoptions(threshold=np.inf)
print(np.matrix(grid))

AttributeError: 'list' object has no attribute 'points'

In [None]:
def cal_midpoint(lat1, lon1, lat2, lon2):
    # https://www.geomidpoint.com/example.html
    # with equal weightings

    print("one lat, lon: {}, {}".format(lat1, lon1))
    print("two lat, lon: {}, {}".format(lat2, lon2))

    # convert to radians
    lat1 = lat1 * math.pi / 180
    lon1 = lon1 * math.pi / 180
    lat2 = lat2 * math.pi / 180
    lon2 = lon2 * math.pi / 180

    x1 = np.cos(lat1) * np.cos(lon1)
    y1 = np.cos(lat1) * np.sin(lon1)
    z1 = np.sin(lat1)

    x2 = np.cos(lat2) * np.cos(lon2)
    y2 = np.cos(lat2) * np.sin(lon2)
    z2 = np.sin(lat2)

    xmid = (x1 + x2) / 2
    ymid = (y1 + y2) / 2
    zmid = (z1 + z2) / 2

    lon_mid = np.arctan2(ymid, xmid)
    hyp_mid = math.sqrt(xmid*xmid + ymid*ymid)
    lat_mid = np.arctan2(zmid, hyp_mid)
    
    # convert back to degrees
    lat_mid = lat_mid * 180 / math.pi
    lon_mid = lon_mid * 180 / math.pi
    
    print("mid lat, lon: {}, {}".format(lat_mid, lon_mid))
    return lat_mid, lon_mid

lat, lon = cal_midpoint(41.162427, -8.587116, 41.161338, -8.585982)

lat, lon = cal_midpoint(-74.0059731, 40.7143528, -87.6297982, 41.8781136)