# AIS-PARSER DATA PLOTING

In this demo, we'll be looking at a preprocessed csv file containing id-state-action-state transitions to plot the corresponding discretized ship trajectories on a map. For information on  how this csv was generated, please reference ``README.md`` or ``process_ais_data.py``.

In [1]:
import yaml
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go

First, we load the metadata. The metadata specifies the dimensions and resolution of the grid in longitude and latitude (and more) so we can plot the discretized trajectories on a map by mapping coordinates to states. The ``grid_params['grid_len']`` is the side length of one square in degrees of a regular Euclidean grid with ``grid_params['num_cols']`` columns. With this information, we can deduce the boundaries of a grid square from an integer state. The metadata also contains a copy of how the preprocessing was performed so we can know the format of the csv. The csv will have 2 extra columns of longitude and latitude if ``options['append_coords']`` is true

In [2]:
meta_file= 'meta_data.yml'
ais_meta = {}
with open(meta_file, 'r') as stream:
    try:
        ais_meta = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

all_files_meta = ais_meta['all_files_meta']
options = ais_meta['options']
directories = ais_meta['directories']
grid_params = ais_meta['grid_params']

# specifies input directory and files of interest
in_dir_path = directories['in_dir_path']
in_dir_data = directories['in_dir_data']

ais_meta

{'all_files_meta': {'aishub-data-20200122-00.csv': {'day': 22,
   'month': 1,
   'year': 2020}},
 'directories': {'in_dir_data': 'ais_data_output.csv',
  'in_dir_path': 'ais_parser/filtered_data_for_visualisations/'},
 'grid_params': {'grid_len': 0.5,
  'max_lat': 50.0,
  'max_lon': 141.0,
  'min_lat': 25.0,
  'min_lon': -127.0,
  'num_cols': 536},
 'options': {'allow_diag': True,
  'append_coords': True,
  'bound_lat': True,
  'bound_lon': False,
  'bound_time': True,
  'interp_actions': True,
  'limit_rows': True,
  'max_rows': 100000,
  'min_states': 2,
  'prec_coords': 3}}

Now we load the sequences.

In this dataset, there could be thousands of trajectories.

In [3]:
# reads in first file of interest
ais_data = pd.read_csv('filtered_data_for_visualisations/ais_data_output.csv')
print(ais_data)

ais_data.head()

     sequence_id  from_state_id  action_id  to_state_id  longitude  latitude
0              0          17259          1        17260    -73.000    41.405
1              0          17260         -1           -1    -73.000    41.405
2              1            364          2          901     55.276    25.254
3              1            901          2         1438     55.750    25.750
4              1           1438          2         1975     56.250    26.250
..           ...            ...        ...          ...        ...       ...
636           33            496         -1           -1    121.493    25.334
637           34           4887          1         4888    -95.001    29.614
638           34           4888         -1           -1    -95.000    29.614
639           35          22827          3        23363     30.782    46.421
640           35          23363         -1           -1     30.731    46.504

[641 rows x 6 columns]


Unnamed: 0,sequence_id,from_state_id,action_id,to_state_id,longitude,latitude
0,0,17259,1,17260,-73.0,41.405
1,0,17260,-1,-1,-73.0,41.405
2,1,364,2,901,55.276,25.254
3,1,901,2,1438,55.75,25.75
4,1,1438,2,1975,56.25,26.25


We define a function that converts the ``state_id``s from the ``ais_data`` to the coordinates corresponding to the middle of that grid square for plotting if ``options['append_coords']`` was not set to ``True`` before preprocessing.

In [4]:
def state_to_coord(state):
    state_col = state % grid_params['num_cols']
    state_row = state // grid_params['num_cols']
    state_lon = grid_params['min_lon'] + grid_params['grid_len'] * (state_col + 0.5)
    state_lat = grid_params['min_lat'] + grid_params['grid_len'] * (state_row + 0.5)
    return state_lon, state_lat

We use pandas to add coordinate columns to our dataframe that will contain the coordinates of the center of each state in each sequence, if this was not done in the preprocessing by setting ``options['append_coords']`` to ``True``.

In [5]:
if not options['append_coords']:
    ais_data[['longitude', 'latitude']] = ais_data.apply(lambda x: state_to_coord(x['from_state_id']), axis=1, result_type='expand')
ais_data.head()

Unnamed: 0,sequence_id,from_state_id,action_id,to_state_id,longitude,latitude
0,0,17259,1,17260,-73.0,41.405
1,0,17260,-1,-1,-73.0,41.405
2,1,364,2,901,55.276,25.254
3,1,901,2,1438,55.75,25.75
4,1,1438,2,1975,56.25,26.25


With the latitudes and longitudes now available, we add a final row to each trajectory with just the last state so a one-to-one mapping of state to coordinates is formed, if this was not already done in preprocessing.

In [6]:
if not options['append_coords']:
    sequence_dfs = pd.DataFrame(columns=['sequence_id', 'from_state_id', 'action_id', 'to_state_id', 'longitude', 'latitude'])
    for traj_num, traj in ais_data.groupby('sequence_id'):
        # adds final dummy row to each sequence with just the final state in the trajectory
        last_state = traj['to_state_id'].iloc[-1]
        last_lon, last_lat = state_to_coord(last_state)

        final_state = {'sequence_id': traj_num, 'from_state_id': last_state, 'action_id': -1, 'to_state_id': -1, 'longitude': last_lon, 'latitude': last_lat}
        final_df = pd.DataFrame(final_state, index=[0, ])
        traj = pd.concat([traj, final_df], ignore_index=True)
        
        sequence_dfs = pd.concat([sequence_dfs, traj], ignore_index=True)
    
    ais_data = sequence_dfs
        
    print(sequence_dfs)

We then use plotly to plot the data on an interactive map, with the option to limit the number of trajectories we plot in the interest of performance and aesthetics. Trajectories may be clicked to enlargen them to better discern where an individual trajectory goes.

In [7]:
# controls how many trajectories to plot - set to -1 to plot all trajectories available
MAX_TRAJECTORIES = 250

plotly.offline.init_notebook_mode(connected=True)

if MAX_TRAJECTORIES > -1:
    ais_data = ais_data[ais_data['sequence_id'] < MAX_TRAJECTORIES]

ais_unique = ais_data[['longitude', 'latitude']].drop_duplicates()  # gets the unique coordinates we're going to plot
    
ais_states = [go.Scattergeo(
    locationmode = 'country names',
    lon = ais_unique['longitude'],
    lat = ais_unique['latitude'],
    hoverinfo = 'text',
    text = ais_data['sequence_id'],
    mode = 'markers',
    marker = go.scattergeo.Marker(
        size = 2,
        color = 'red',
        line = go.scattergeo.marker.Line(
            width = 3,
            color = 'rgba(68, 68, 68, 50)'
        )
    ))]


ais_trajectories = []
for traj_num, traj_data in ais_data.groupby('sequence_id'):
    # gets random color for each trajectory
    red = str(np.random.randint(0, high=230))
    green = str(np.random.randint(0, high=230))
    blue = str(np.random.randint(0, high=230))
    ais_trajectories.append(
        go.Scattergeo(
            lon = traj_data['longitude'],
            lat = traj_data['latitude'],
            mode = 'lines',
            line = go.scattergeo.Line(
                width = 1,
                color = 'rgb(' + red + ', ' + blue + ', ' + green + ')',
            ),
        )
    )

layout = go.Layout(
    autosize=False,
    width=900,
    height=750,
    title = go.layout.Title(
        text = 'Shipping Data States Scatter'
    ),
    showlegend = False,
    geo = go.layout.Geo(
        scope = 'world',
        resolution = 50,
        projection = go.layout.geo.Projection(
            type = 'equirectangular'
        ),
        showland = True,
        showlakes = True,
        coastlinewidth = 2,
        landcolor = 'rgb(204, 204, 204)',
        lakecolor = 'rgb(255,255,255)',
        countrycolor = 'rgb(190, 190, 190)',
        lonaxis = go.layout.geo.Lonaxis(
            range = [grid_params['min_lon'] - 25, grid_params['max_lon'] + 25],
            showgrid = True,
            dtick = grid_params['grid_len']
        ),
        lataxis = go.layout.geo.Lataxis(
            range = [grid_params['min_lat'] - 15, grid_params['max_lat'] + 15],
            showgrid = True,
            dtick = grid_params['grid_len']
        ),
    ),
)

fig = go.FigureWidget(data = ais_states + ais_trajectories, layout = layout)

lines = fig.data[1:]

# create our callback function
def update_point(trace, points, selector):
    if len(points.point_inds) > 0:
        trace.line.width += 1

for line in lines:
    line.on_click(update_point)

fig

FigureWidget({
    'data': [{'hoverinfo': 'text',
              'lat': array([41.405, 25.254, 25.75 , ..., 29.…