In [2]:
import os
import random
import math

import numpy as np
import pandas as pd
import haven.db as db 
import plotly.express as px

from mirrorverse.utils import read_data_w_cache

os.environ['HAVEN_DATABASE'] = 'haven'
os.environ['AWS_PROFILE'] = 'admin'

In [None]:
TEST = True

table = 'movement_model_raw_features_m2_a2' if not TEST else 'movement_model_raw_features_m2_a2_test'
data = read_data_w_cache(
    f'select * from {table}'
)
data = data[data['_selected'].isin([True, False])]
data.head()

In [None]:
df = data.merge(
    data[['_individual']].drop_duplicates().reset_index(drop=True).reset_index(),
    on='_individual'
)
del df['_individual']
df = df.rename({'index': '_individual'}, axis=1)
df.head()

In [None]:
df.groupby('_train').size()

In [None]:
df['normed_distance'] = df['distance'] / 100
px.histogram(df['normed_distance'])

In [None]:
df['normed_log_mlt'] = np.log(df['mixed_layer_thickness'] + 0.001)
print(df['normed_log_mlt'].mean())
df['normed_log_mlt'] = df['normed_log_mlt'] - 3.068049438368808
px.histogram(df['normed_log_mlt'])

In [None]:
df['normed_log_npp'] = np.log(df['net_primary_production'] + 0.001)
print(df['normed_log_npp'].mean())
df['normed_log_npp'] = df['normed_log_npp'] - 1.9856236
px.histogram(df['normed_log_npp'])

# Add Additional Features

In [None]:
sizes = read_data_w_cache('select tag_key, fork_length_cm from mgietzmann_tags')
sizes.head()

In [None]:
print(sizes['fork_length_cm'].mean())
print(sizes['fork_length_cm'].std())
sizes['normalized_fork_length'] = (sizes['fork_length_cm'] - sizes['fork_length_cm'].mean()) / sizes['fork_length_cm'].std()
px.histogram(sizes['normalized_fork_length'])

In [None]:
regions = read_data_w_cache('select tag_key, region from mgietzmann_tag_regions')
regions.head()

In [None]:
regions['region'].unique()

In [None]:
locs = pd.DataFrame([
    {
        'region': 'WA/OR',
        'home_lat': 46.0,
        'home_lon': -125.0
    },
    {
        'region': 'SEAK',
        'home_lat': 53.0,
        'home_lon': -130.0
    },
    {
        'region': 'BC',
        'home_lat': 57.0,
        'home_lon': -136.0
    },
])
regions = regions[['tag_key', 'region']].merge(locs, how='outer')
regions['home_lat'] = regions['home_lat'].fillna(360.0)
regions['home_lon'] = regions['home_lon'].fillna(360.0)
regions['normed_home_lat'] = regions['home_lat'] / 90.0
regions['normed_home_lon'] = regions['home_lon'] / 180
regions.head()

In [None]:
print(data.shape)
df = df.merge(regions).merge(sizes)
print(data.shape)

In [None]:
def month_to_radians(x):
    return (x-1)/11 * 2 * np.pi

df['cos_time'] = df['time'].dt.month.apply(lambda m: np.cos(month_to_radians(m)))
df['sin_time'] = df['time'].dt.month.apply(lambda m: np.sin(month_to_radians(m)))
df.head()

# Write Data

In [30]:
table = 'movement_model_features_m3_a2' if not TEST else 'movement_model_features_m3_a2_test'

db.write_data(
    df, table, ['tag_key']
)