# Preproces AIS data

Load data for a specific region and time period and extract trajectories of ships.

In [1]:
# imports
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import pandas as pd
import gym
import datetime

import os, sys
sys.path.append(os.path.abspath('/workspace/FerryGymEnv'))
print(sys.path)


from FerryGymEnv.FerryGymEnv import FerryGymEnv
from MapRevMoenk import *

['/', '/workspace', '/usr/local/lib/python39.zip', '/usr/local/lib/python3.9', '/usr/local/lib/python3.9/lib-dynload', '', '/root/.local/share/virtualenvs/workspace-dqq3IVyd/lib/python3.9/site-packages', '/workspace/FerryGymEnv']
path ferrygym ['/', '/workspace', '/usr/local/lib/python39.zip', '/usr/local/lib/python3.9', '/usr/local/lib/python3.9/lib-dynload', '', '/root/.local/share/virtualenvs/workspace-dqq3IVyd/lib/python3.9/site-packages', '/workspace/FerryGymEnv']


## Load data

Our AIS data is stored at a Postgres server at our institute's server.
To access the data a GraphQL API is provided through [Hasura](https://hasura.io/).


In [2]:
# connect to api
# Select your transport with a defined url endpoint
transport = AIOHTTPTransport(
    url="http://172.23.0.5:8080/v1/graphql",
    headers={'content-type': 'application/json',
        'x-hasura-admin-secret': 'aaasdsdndsfvksdsd'})

# Create a GraphQL client using the defined transport
client = Client(transport=transport, fetch_schema_from_transport=True)

# Provide a GraphQL query
query = gql(
    """
    query MyQuery(
  $lteTimeString: String!,
  $gteTimeString: String!,
  $bound_bottom: float8!,
  $bound_top: float8!,
  $bound_left: float8!,
  $bound_right: float8!,
  $minSpeed: float8!) {
      ais(limit: 10000000, where: {
        datetime: {_lt: $lteTimeString, _gte: $gteTimeString},
        lon: {_is_null: false, _gte: $bound_left, _lte: $bound_right},
        lat: {_is_null: false, _gte: $bound_bottom, _lte: $bound_top},
        speed: {_is_null: false, _gt: $minSpeed},
        }) {
            mmsi
            datetime
            lat
            lon
            speed
            heading
            }
    }
"""
)

params = {"lteTimeString": "2022-04-07 12:00:00+00", "gteTimeString": "2022-04-01 12:00:00+00", "bound_right": BOUND_RIGHT, "bound_left": BOUND_LEFT, "bound_top": BOUND_TOP, "bound_bottom": BOUND_BOTTOM, "minSpeed": 0.3 }
print(params)

# Execute the query on the transport
result = await client.execute_async(query, variable_values=params)
df = pd.DataFrame(result['ais'])
# get min datetime in df
print('start data: ', df['datetime'].min())
# get max datetime in df
print('end date: ',df['datetime'].max())
# print min speed
print('min speed: ',df['speed'].min())
# print max speed
print('max speed: ',df['speed'].max())

{'lteTimeString': '2022-04-07 12:00:00+00', 'gteTimeString': '2022-04-01 12:00:00+00', 'bound_right': 10.190433, 'bound_left': 10.141767, 'bound_top': 54.352733, 'bound_bottom': 54.325583, 'minSpeed': 0.3}
start data:  2022-04-01 12:00:00+00
end date:  2022-04-07 11:59:56+00
min speed:  0.4
max speed:  102.3


In [37]:
# Convert in env coordinates

kwargs = dict(
    generate_training_data=True,
    data_directory='/workspace/data/rev-moenk/training/',
    df_filename='2022-04-10-13->14.pkl',
    startingTime=datetime.datetime(2022, 4, 10, 13, 0, 1),
)

env = FerryGymEnv(**kwargs)
# iterate though rows
for index, row in df.iterrows():
    # get x,y from env.convertLatonInEnvCoordinates
    x, y = env.convertLatLotInEnvCoordinates(row['lat'], row['lon'])
    # set x,y in df
    df.at[index, 'x'] = x
    df.at[index, 'y'] = y

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [38]:
# interpolate_trajectories to generate position data for every second

df['datetime'] = pd.to_datetime(df['datetime'])
df.index = df['datetime']
df_grouped = df.groupby(['mmsi'])
df_resampled = df_grouped.resample('1S', on='datetime').mean()
df_interpol = df_resampled.interpolate()

In [39]:
# add index column and reset index
df =  df_interpol.reset_index(level=['datetime'])
# add new index column numbering the rows
df['index'] = range(0, len(df))
df.set_index('index', inplace=True)

### Calculate speed, acceleration, direction and direction_change

In [41]:
# calculate current speed and save in new column
timedelta = datetime.timedelta(seconds=1)

df['dist_x'] = df['x'].diff().fillna(0.)
df['dist_y'] = df['y'].diff().fillna(0.)
df['dist'] = (df['dist_x']**2 + df['dist_y']**2)**0.5
# add column for speed in m/s (speed column is in knots)
speed_calculated = df['dist'] / timedelta.total_seconds()
df['speed_calculated'] = speed_calculated 
# print min speed_calculated
print('min speed_calculated: ',df['speed_calculated'].min())
# print max speed_calculated
print('max speed_calculated: ',df['speed_calculated'].max())
# count speed_calculated == 0
print('count speed_calculated == 0: ',df['speed_calculated'].eq(0).sum())



min speed_calculated:  0.0
max speed_calculated:  11.90321183259538
count speed_calculated == 0:  1188


In [None]:
def angle_between(p1, p2):
    ang1 = np.arctan2(*p1[::-1])
    ang2 = np.arctan2(*p2[::-1])
    return np.rad2deg((ang1 - ang2) % (2 * np.pi))

df['dist_x']            = df['x'].diff().fillna(0.)
df['dist_y']            = df['y'].diff().fillna(0.)
df['dist']              = (df['dist_x']**2 + df['dist_y']**2)**0.5
df['speed_calculated']  = df['dist'] / timedelta.total_seconds()
df['direction']         = angle_between([0, 1], [df['dist_x'], -1 * df['dist_y']])

In [30]:
 # calculate acceleration and save in new column
acceleration = df['speed_calculated'].diff().fillna(0.)
df['acceleration'] = acceleration / timedelta.total_seconds()
# move one row up to match state
df['acceleration'] = df['acceleration'].shift(-1)

In [31]:
import numpy as np
def angle_between(p1, p2):
    ang1 = np.arctan2(*p1[::-1])
    ang2 = np.arctan2(*p2[::-1])
    return np.rad2deg((ang1 - ang2) % (2 * np.pi))

df['direction'] = angle_between([0, 1], [df['dist_x'], -1 * df['dist_y']])

In [32]:
# add direction_change column
df['direction_change'] = df['direction'].diff().fillna(0)
# move one row up to match state
df['direction_change'] = df['direction_change'].shift(-1)

In [None]:
# clean up first and last row in group
mask_last_row_of_ship = df.mmsi != df.mmsi.shift(-1)
# mask_time_gap = df.datetime != df.datetime.shift(1) - timedelta
df['acceleration'][mask_last_row_of_ship] = 0
df['direction_change'][mask_last_row_of_ship] = 0
mask_first_row_of_ship = df.mmsi != df.mmsi.shift(1)
df['acceleration'][mask_first_row_of_ship] = 0
df['direction_change'][mask_first_row_of_ship] = 0
df['speed_calculated'][mask_first_row_of_ship] = df['speed_calculated'].shift(1)
df['direction'][mask_first_row_of_ship] = df['direction'].shift(1)

In [None]:
# round actions to 2 decimals
df['acceleration'] = df['acceleration'].round(2)
df['direction_change'] = df['direction_change'].round(2)

In [None]:
# replace nan with 0
df['acceleration'] = df['acceleration'].fillna(0)
df['direction_change'] = df['direction_change'].fillna(0)
df['speed_calculated'] = df['speed_calculated'].fillna(0)
df['direction'] = df['direction'].fillna(0)

In [None]:
def move_around_the_clock(direction_change):
    if abs(direction_change) > 180:
        if direction_change > 0:
            return direction_change - 360
        else:
            return direction_change + 360
    else:
        return direction_change

# if direction change is abs > 180, set to 360 - direction_change
df['direction_change'] = df['direction_change'].apply(move_around_the_clock)

In [4]:
import numpy as np
np.sin(np.deg2rad(0.5*192))

0.9945218953682733

In [33]:
# reduce to important columns
df = df[['mmsi', 'datetime', 'x', 'y', 'speed_calculated', 'acceleration', 'direction', 'direction_change']]

In [42]:
# save df to pickle file
print('Saving df to pickle file')
df.to_pickle('/workspace/data/rev-moenk3/training/2022-04-01-12->04-07-12.pkl')

In [4]:
### Generate neighborhood images

kwargs = dict(
    generate_training_data=True,
    data_directory='/workspace/data/rev-moenk3/training/',
    df_filename='2022-04-01-12->04-07-12.pkl',
    startingTime=datetime.datetime(2022, 4, 1, 12, 0, 1),
)

gym.register(
    id="FerryGym-preprocess-v1",
    entry_point="FerryGymEnv.FerryGymEnv:FerryGymEnv",
    kwargs=kwargs,
)

env = gym.make('FerryGym-preprocess-v1')

print('start generating images, this may take a while')
env.reset()
done = False
# for step in range(1):
while not done:
    obs, reward, done, info = env.step((1, 1))
    if done:
        env.close()
        break



  logger.warn(f"Overriding environment {spec.id}")
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  deprecation(
  deprecation(
  logger.warn(
  logger.warn("Casting input x to numpy array.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(
  logger.warn(
  logger.deprecation(
  logger.warn(
  logger.warn("Casting input x to numpy array.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(
  logger.warn(


reset called
stop at:  2022-04-10 14:00:01
saving df:  /workspace/data/rev-moenk/training/df_2022-04-10 13:00:01.pkl
