# Check Axel's calculations.

# Purpose
* Axel has made a small script to compare AIS and GPS data.
* This script will be examined and understood in this notebook.

## Results
* the total sailed distance differs about 6% between SSPA AIS and GPS
* This is however disregarding missing data and that the SSPA AIS has some data reduction.
* If the 0-1 kts speeds are disregarded in the comparison, the difference is 2%.

# Setup

In [None]:
# %load imports.py
from typing import no_type_check
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pyaisdb.database import DB



In [None]:
db = DB()

In [None]:
df_speed_distances = pd.DataFrame(dtype='float64')
s_gps = pd.Series(dtype='float64')
s_ais = pd.Series(dtype='float64')

## GPS data

In [None]:
for i in range(30):
    sql = f"""with blue_data as (SELECT time_info, lag(time_info, 1) OVER (ORDER by time_info ASC) as next_time, sog, 
  ST_Distance(pos::geography, lag(pos::geography, 1) OVER (ORDER by time_info ASC)) as dist
	FROM projects._49145341_d2e2f_blue_data_varmdo
	where time_info < '2020-07-19 23:59:59+02'
                  and time_info > '2020-07-10 00:00:00+02'
)
select sum(dist)/1852
from blue_data
where sog >= {i} and sog < {i + 1}"""
    # print(sql)
    distance = db.execute_and_return(sql)[0][0]
    if distance:
        #print(f'{i + 0.5} {round(result, 2)}')
        speed = i + 0.5
        s_gps[speed] = distance


## AIS data

In [None]:
for i in range(30):
    sql = f"""select sum(st_length(segment::geography)) / 1852
from segments_sjfv_2020
where sog>={i} and sog < {i+1}
and mmsi=265520390
and date2 < '2020-07-19 23:59:59+02'
and date1 > '2020-07-10 00:00:00+02' """
    distance = db.execute_and_return(sql)[0][0]
    if distance:
        #print(f'{i + 0.5} {round(result, 2)}')
        speed = i + 0.5
        s_ais[speed] = distance


In [None]:
df_speed_distances['GPS'] = s_gps
df_speed_distances['AIS'] = s_ais
df_speed_distances.index.name='speed'
df_speed_distances.head()

In [None]:
df_speed_distances.describe()

In [None]:
df_speed_distances.sum()

In [None]:
df_speed_distances.sum().pct_change()

In [None]:
fig,ax=plt.subplots()
fig.set_size_inches(17,7)
df_speed_distances.plot(style='.-', ax=ax);
ax.grid(True)
ax.set_ylabel('Distance [NM]')
ax.set_xlabel('Ship speed [kts]')


In [None]:
df_speed_distances.iloc[1:].sum().pct_change()