# Data exploration

# Purpose
Explore the ForSea data

# Methodology
Quickly describe assumptions and processing steps.

# Setup

In [None]:
# %load imports.py
%matplotlib inline
%load_ext autoreload
%autoreload 2

import src.data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from collections import OrderedDict

from IPython.display import display

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option("display.max_columns", None)


In [None]:
file_path = os.path.join(src.data.path_tycho)

max_rows = 100000
min_V = 0.01

reader = pd.read_csv(file_path, chunksize=10000, iterator=True)  # Loading a small part of the data
df_raw = pd.DataFrame()
for df_ in reader:
    
    mask = df_['Speed over ground (kts)']>min_V
    df_=df_.loc[mask].copy()
    df_raw = df_raw.append(df_)
    
    if len(df_raw) > max_rows:
        break
    
df_raw.set_index('Timestamp [UTC]', inplace=True)
df_raw.index = pd.to_datetime(df_raw.index)

In [None]:
df_raw.describe()

In [None]:
df_raw.head()

In [None]:
df = df_raw.rename(columns = {
    'Latitude (deg)' : 'latitude',
    'Longitude (deg)' : 'longitude',
        
    
})

df['V'] = df['Speed over ground (kts)']*1.852/3.6


df.drop(columns=[
    'Speed over ground (kts)',
], inplace=True)



## Plot maps

In [None]:
df.plot(x='longitude', y='latitude')

In [None]:
df_ = df.resample('30S').mean()
df_.dropna(subset=['latitude','longitude'], inplace=True)

mask = df_['Course over ground (deg)'] < 150
df_out = df_.loc[mask]
df_home = df_.loc[~mask]



In [None]:
import folium
my_map = folium.Map(location=(df_['latitude'].mean(),df_['longitude'].mean()), zoom_start=14)

In [None]:
points = df_out[['latitude','longitude']].to_records(index=False)
line = folium.PolyLine(points, popup='out')
line.add_to(my_map);

points = df_home[['latitude','longitude']].to_records(index=False)
line2 = folium.PolyLine(points, popup='home')
line2.add_to(my_map);




In [None]:
my_map