In [6]:
from matplotlib import pyplot as plt
import numpy as np
import math

In [7]:
import pandas as pd

df_rows = pd.read_csv("./taxi_dataset/training_data_simple.csv", sep='\n', nrows=5, header=None)
df = df_rows[0].str.split(',', expand=True)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,663,664,665,666,667,668,669,670,671,672
0,1372638303,-8.587116,41.162427,-8.586198,41.162112,-8.585982,41.161338,-8.585037,41.160024,-8.584146,...,,,,,,,,,,
1,1372646650,-8.6103,41.153688,-8.610336,41.153679,-8.610192,41.154039,-8.609985,41.154759,-8.609931,...,,,,,,,,,,
2,1372645583,-8.591301,41.156163,-8.592651,41.156163,-8.594415,41.156298,-8.594451,41.156316,-8.59446,...,,,,,,,,,,
3,1372651427,-8.610291,41.153625,-8.61093,41.153643,-8.611965,41.153859,-8.611938,41.154651,-8.611884,...,,,,,,,,,,
4,1372652856,-8.61372,41.148414,-8.614287,41.148414,-8.614431,41.148387,-8.614845,41.147451,-8.614872,...,,,,,,,,,,


In [8]:
df = df.iloc[:7,:7]
df.columns = ["start_time", "lon_1", "lat_1", "lon_2", "lat_2", "lon_3", "lat_3"]
df

Unnamed: 0,start_time,lon_1,lat_1,lon_2,lat_2,lon_3,lat_3
0,1372638303,-8.587116,41.162427,-8.586198,41.162112,-8.585982,41.161338
1,1372646650,-8.6103,41.153688,-8.610336,41.153679,-8.610192,41.154039
2,1372645583,-8.591301,41.156163,-8.592651,41.156163,-8.594415,41.156298
3,1372651427,-8.610291,41.153625,-8.61093,41.153643,-8.611965,41.153859
4,1372652856,-8.61372,41.148414,-8.614287,41.148414,-8.614431,41.148387


In [9]:
MIN_LAT = 41.14478; MIN_LON = -8.69346
MAX_LAT = 41.18652; MAX_LON = -8.57804

def cal_dis(lat_1,lon_1,lat_2,lon_2):
    lon_1 = lon_1 * math.pi / 180
    lat_1 = lat_1 * math.pi / 180
    lon_2 = lon_2 * math.pi / 180
    lat_2 = lat_2 * math.pi / 180
    a = abs(lat_1 - lat_2)
    b = abs(lon_1 - lon_2)
    d = 2 * 6378.137 * np.arcsin(
        np.sqrt(np.sin(a / 2) * np.sin(a / 2) + np.cos(lat_1) * np.cos(lat_2) * np.sin(b / 2) * np.sin(b / 2)))
    return d

In [10]:
length = cal_dis(lat_1=MIN_LAT, lon_1=(MAX_LON-MIN_LON)/2, lat_2=MAX_LAT, lon_2=(MAX_LON-MIN_LON)/2)
width  = cal_dis(lat_1=(MAX_LAT-MIN_LAT)/2, lon_1=MIN_LON, lat_2=(MAX_LAT-MIN_LAT)/2, lon_2=MAX_LON)

print("length:", length)
print("width:", width)
print("area:", length*width)

length: 4.646475545711363
width: 12.848494775001559
area: 59.70021677124496


In [11]:
UNIT_SIZE = 0.1
length_size = math.ceil(length / UNIT_SIZE)
width_size  = math.ceil(width  / UNIT_SIZE)

print("length_size:", length_size)
print("width_size:", width_size)

grid = [['.']*width_size for i in range(length_size)]
# for i in range(length_size):
#     for j in range(width_size):
#         print(grid[i][j])
    # print('\n')

# np. set_printoptions(threshold=np. inf)
print(np.matrix(grid))

length_size: 47
width_size: 129
[['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ...
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']]


In [12]:
this_lat = 41.162427; this_lon = -8.587116

print("dist:", cal_dis(MIN_LAT, MIN_LON, this_lat, this_lon))

lat_dis = cal_dis(lat_1=MIN_LAT, lon_1=(this_lon-MIN_LON)/2, lat_2=this_lat, lon_2=(this_lon-MIN_LON)/2)
print("lat_dis:", lat_dis)

lon_dis = cal_dis(lat_1=(this_lat-MIN_LAT)/2, lon_1=MIN_LON, lat_2=(this_lat-MIN_LAT)/2, lon_2=this_lon)
print("lon_dis:", lon_dis)

grid[round(lat_dis)][round(lon_dis)] = 2

# np. set_printoptions(threshold=np.inf)
print(np.matrix(grid))

dist: 9.127425730320764
lat_dis: 1.964455054029042
lon_dis: 11.838159788544635
[['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ...
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']]


In [51]:
class Point:
    def __init__(self, time, lon, lat):
        self.timestamp = time
        self.lon = float(lon)
        self.lat = float(lat)
    
    def __str__(self):
        return ("Point Description: [Timestamp: {:10d}, Longitude: {:9s}, Latitude: {:9s}]"\
            .format(self.timestamp, self.lon, self.lat))

    def __repr__(self):
        return ("Point Description: [Timestamp: {:10d}, Longitude: {:9s}, Latitude: {:9s}]"\
            .format(self.timestamp, self.lon, self.lat))

In [46]:
points = []

# iterrows slow access, https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas
for index, row in df.iterrows():

    first_timestamp = row[0]
    points.append(Point(time=int(first_timestamp), lon=row[1], lat=row[2]))

    for i in range(3, row.size, 2):
        this_timestamp = int(first_timestamp) + (i//2)*15
        points.append(Point(time=this_timestamp, lon=row[i], lat=row[i+1]))

print("len(points):", len(points))
for point in points:
    # print(point)
    pass

from operator import attrgetter
min_lon = (min(points,key=attrgetter('lon')).lon)
max_lon = (max(points,key=attrgetter('lon')).lon)
print("min_lon:", min_lon)
print("max_lon:", max_lon)

min_lat = (min(points,key=attrgetter('lat')).lat)
max_lat = (max(points,key=attrgetter('lat')).lat)
print("min_lat:", min_lat)
print("max_lat:", max_lat)




len(points): 15
min_lon: -8.614431
max_lon: -8.585982
min_lat: 41.148387
max_lat: 41.162427


In [47]:
MIN_LAT=float(min_lat); MAX_LAT=float(max_lat)
MIN_LON=float(min_lon); MAX_LON=float(max_lon)

length = cal_dis(lat_1=MIN_LAT, lon_1=(MAX_LON-MIN_LON)/2, lat_2=MAX_LAT, lon_2=(MAX_LON-MIN_LON)/2)
width  = cal_dis(lat_1=(MAX_LAT-MIN_LAT)/2, lon_1=MIN_LON, lat_2=(MAX_LAT-MIN_LAT)/2, lon_2=MAX_LON)

print("length:", length)
print("width:", width)
print("area:", length*width)

length: 1.5629256507376506
width: 3.166928169807309
area: 4.949673270635485


In [58]:
UNIT_SIZE = 0.05
length_size = math.ceil(length / UNIT_SIZE)
width_size  = math.ceil(width  / UNIT_SIZE)

print("length_size:", length_size)
print("width_size:", width_size)

grid = [['.']*width_size for i in range(length_size)]
# for i in range(length_size):
#     for j in range(width_size):
#         print(grid[i][j])
    # print('\n')

# np. set_printoptions(threshold=np. inf)
print(np.matrix(grid))

length_size: 32
width_size: 64
[['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ...
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']]


In [59]:
for point in points:
    this_lat = point.lat
    this_lon = point.lon

    # print("dist:", cal_dis(MIN_LAT, MIN_LON, this_lat, this_lon))

    lat_dis = cal_dis(lat_1=MIN_LAT, lon_1=(this_lon-MIN_LON)/2, lat_2=this_lat, lon_2=(this_lon-MIN_LON)/2)
    # print("lat_dis:", lat_dis)

    lon_dis = cal_dis(lat_1=(this_lat-MIN_LAT)/2, lon_1=MIN_LON, lat_2=(this_lat-MIN_LAT)/2, lon_2=this_lon)
    # print("lon_dis:", lon_dis)

    x, y = round(lat_dis), round(lon_dis)
    if grid[x][y] != '.':
        print("Overwriting grid[{}][{}]".format(x,y))
    grid[x][y] = point.timestamp
    

# np.set_printoptions(threshold=np.inf)
print(np.matrix(grid))

Overwriting grid[2][3]
Overwriting grid[1][0]
Overwriting grid[1][0]
Overwriting grid[1][3]
Overwriting grid[1][2]
Overwriting grid[1][0]
Overwriting grid[1][0]
Overwriting grid[1][0]
Overwriting grid[0][0]
Overwriting grid[0][0]
[['1372652886' '.' '.' ... '.' '.' '.']
 ['1372651457' '.' '1372645613' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ...
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']
 ['.' '.' '.' ... '.' '.' '.']]
