# Analyze time

# Purpose
* There could be missing data in either AIS, GPS or both, this notebook will investigate this.

# Methodology
* Load AIS data
* Load GPS data
* compare time stamps.

## Results
Describe and comment the most important results.

# Setup

In [None]:
# %load imports.py
from typing import no_type_check
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pyaisdb.database import DB
import geopandas as gpd



In [None]:
from shapely.geometry import Point

In [None]:
db = DB()

In [None]:
date1 = '2020-07-10 00:00:00+02'
date2 = '2020-07-19 23:59:59+02'
mmsi = 265520390
sog_min = 1

## AIS

In [None]:
sql = f"""select *
from segments_sjfv_2020
where sog>={sog_min}
and mmsi={265520390}
and date2 < '{date2}'
and date1 > '{date1}' """

df_ais = gpd.GeoDataFrame.from_postgis(sql, db.conn, geom_col='segment', index_col='date1')
df_ais.sort_index(inplace=True)
assert df_ais.index.is_unique

In [None]:
df_ais.describe()

In [None]:
df_ais.head()

In [None]:
df_ais.plot()

In [None]:
s = df_ais.iloc[0]
segment = s['segment']
segment.coords[0]

In [None]:
point = Point(segment.coords[0])

In [None]:
rows = []

for index,s in df_ais.iterrows():

    s = s.copy()
    segment = s['segment']
    point0 = Point(segment.coords[0])
    point1 = Point(segment.coords[1])
    
    date0_ = index
    date1_ = s.pop('date2')
    
    s0 = s.copy()
    s0['pos'] = point0
    s0.name = date0_

    s1 = s.copy()
    s1['pos'] = point1
    s1.name = date1_

    rows.append(s0)
    rows.append(s1)
        
df_ais_pos = pd.DataFrame(rows)
df_ais_pos['geometry'] = gpd.GeoSeries(df_ais_pos['pos'])
df_ais_pos_raw = gpd.GeoDataFrame(df_ais_pos)

In [None]:
mask = df_ais_pos_raw.index.duplicated(keep='first')
df_ais_pos = df_ais_pos_raw.loc[~mask].copy()
df_ais_pos.sort_index(inplace=True)
assert df_ais_pos.index.is_unique

In [None]:
df_ais_pos.describe()

## GPS

In [None]:
sql = f"""select * 
  FROM projects._49145341_d2e2f_blue_data_varmdo
	where time_info < '{date2}'
                  and time_info > '{date1}'
and sog >= {sog_min}"""
df_gps = gpd.GeoDataFrame.from_postgis(sql, db.conn, geom_col='pos', index_col='time_info') 
df_gps.sort_index(inplace=True)
assert df_gps.index.is_unique

In [None]:
df_gps.describe()

In [None]:
s = df_gps.iloc[0]

In [None]:
position = s['pos']
position.x

In [None]:
position.y

In [None]:
#?df_gps.plot

In [None]:
fig,ax=plt.subplots()
fig.set_size_inches(17,7)
df_ais_pos.plot(markersize=0.2, label='AIS', ax=ax)
df_gps.plot(markersize=0.2, label='GPS', ax=ax, alpha=0.2)

In [None]:
df_gps.describe()

In [None]:
df_gps.head()

In [None]:
start = np.max([df_gps.index[0], df_ais_pos.index[0]])
stop = np.min([df_gps.index[-1], df_ais_pos.index[-1]])
mask = ((df_gps.index >=start) & (df_gps.index<=stop))
df_gps = df_gps.loc[mask].copy()

mask = ((df_ais_pos.index >=start) & (df_ais_pos.index<=stop))
df_ais_pos = df_ais_pos.loc[mask].copy()

In [None]:
df_gps['t'] = (df_gps.index-df_gps.index[0]).total_seconds().values
df_ais_pos['t'] = (df_ais_pos.index-df_gps.index[0]).total_seconds().values  # Note that t0 is taken from GPS

In [None]:
df_ais_pos['t']

In [None]:
fig,axes=plt.subplots(nrows=2)
fig.set_size_inches(17,7)
df_gps.plot(y='sog', kind='line', ax=axes[0], label='GPS')
df_ais_pos.plot(y='sog', kind='line', ax=axes[1], label='AIS', style='r-')
plt.tight_layout()



In [None]:
fig,ax=plt.subplots()
fig.set_size_inches(17,7)

stop_date = '2020-07-10 06:39:25+00:00'
mask = df_gps.index < stop_date
df_gps_cut = df_gps.loc[mask].copy()

mask = df_ais_pos.index < stop_date
df_ais_pos_cut = df_ais_pos.loc[mask].copy()


#df_gps_cut.resample('30S').mean().dropna().plot(y='sog', kind='line', style='.-', ax=ax, label='GPS')
#df_ais_pos_cut.resample('30S').mean().dropna().plot(y='sog', kind='line', style='.-', ax=ax, label='AIS')

df_gps_cut.dropna().plot(y='sog', kind='line', style='.-', ax=ax, label='GPS')
df_ais_pos_cut.dropna().plot(y='sog', kind='line', style='.-', ax=ax, label='AIS')

In [None]:
df_gps.describe()

In [None]:
df_ais_pos.describe()