# Load libraries

In [None]:
import os
import numpy as np
import xarray as xr

from plotly import tools
from plotly import offline
import plotly.graph_objs as go

import cufflinks as cf
cf.set_config_file(offline=True)

# Download $\textbf{pCO}_2$ testbed - member 001 dataset

This contains model output from member 001 of the CESM large ensemble which can be used to evaluate ocean pCO2 interpolation methods. The dataset includes:

* pCO2 = ocean partial pressure of carbon dioxide (uatm)
* XCO2 = atmospheric carbon dioxide mixing ratio (ppmv)
* SST = sea surface temperature (degC)
* SSS = sea surface salinity (g/kg)
* MLD = mixed layer depth (m)
* CHL = chlorophyll concentration (mg/m3)

In [None]:
# #uncomment to download all data, though it's faster to download directly from figshare website
# %%bash
# curl https://ndownloader.figshare.com/files/13193369?private_link=a3e9a208108d91d75f72 --output ../data/raw/Chl_2D_mon_CESM001_1x1_198201-201701.nc
# curl https://ndownloader.figshare.com/files/13194038?private_link=a3e9a208108d91d75f72 --output ../data/raw/MLD_2D_mon_CESM001_1x1_198201-201701.nc
# curl https://ndownloader.figshare.com/files/13194299?private_link=a3e9a208108d91d75f72 --output ../data/raw/pCO2_2D_mon_CESM001_1x1_198201-201701.nc
# curl https://ndownloader.figshare.com/files/13194548?private_link=a3e9a208108d91d75f72 --output ../data/raw/SSS_2D_mon_CESM001_1x1_198201-201701.nc
# curl https://ndownloader.figshare.com/files/13194665?private_link=a3e9a208108d91d75f72 --output ../data/raw/SST_2D_mon_CESM001_1x1_198201-201701.nc
# curl https://ndownloader.figshare.com/files/13195304?private_link=a3e9a208108d91d75f72 --output ../data/raw/XCO2_1D_mon_CESM001_native_198201-201701.nc

# Read each feature

In [None]:
DATA_DIR = '../data/raw'
dataset_names = {'pCO2': 'pCO2_2D_mon_CESM001_1x1_198201-201701.nc',
                 'XCO2': 'XCO2_1D_mon_CESM001_native_198201-201701.nc',
                 'SST': 'SST_2D_mon_CESM001_1x1_198201-201701.nc',
                 'SSS': 'SSS_2D_mon_CESM001_1x1_198201-201701.nc',
                 'MLD': 'MLD_2D_mon_CESM001_1x1_198201-201701.nc',
                 'Chl': 'Chl_2D_mon_CESM001_1x1_198201-201701.nc'}
ds = {}
for dataset in dataset_names.keys():
    filename = os.path.join(DATA_DIR, dataset_names[dataset])
    ds[dataset] = xr.open_dataset(filename)

# Print information of each dataset

In [None]:
for dataset in ds.keys():
    print("---------{}---------".format(dataset))
    print(ds[dataset].info())

# Convert xarray to dataframe

* Select corresponding features from each dataset: for example, `df['pCO2']` has both variables `pCO2_socat` (pCO2 just at SOCAT locations) and `pCO2`, so both of the two features will be converted to dataframe
* Only keep grids with finite values
* Convert pivot table to flat dataframe

In [None]:
df = {}
df_socat = {}
for dataset in ds.keys():
    # e.g. pCO2
    df[dataset] = ds[dataset][dataset].to_dataframe()
    df[dataset] = df[dataset][np.isfinite(df[dataset][dataset])].reset_index()
    
    # XCO2 is only time dependent, so it has no XCO2_socat feature
    if dataset == 'XCO2':
        continue
        
    # e.g. pCO2_socat
    df_socat[dataset] = ds[dataset]['{}_socat'.format(dataset)].to_dataframe()
    df_socat[dataset] = df_socat[dataset][np.isfinite(df_socat[dataset]['{}_socat'.format(dataset)])].reset_index()
    
    # e.g. pCO2_socat will have zeros fillings at those non-SOCAT locations, so to only keep data points
    # at SOCAT locations, we keep non-zero values
    df_socat[dataset] = df_socat[dataset][df_socat[dataset]['{}_socat'.format(dataset)] > 0]

Here, it's how it looks after converting to dataframe.

In [None]:
print(df['pCO2'].shape)
df['pCO2'].head()

It makes sense that socat features has much less available values than non-socat features.

In [None]:
print(df_socat['pCO2'].shape)
df_socat['pCO2'].head()

# Data visualization

In [None]:
mapbox_access_token = 'pk.eyJ1IjoiamFja3AiLCJhIjoidGpzN0lXVSJ9.7YK6eRwUNFwd3ODZff6JvA'

## XCO2

Since `XCO2` is not spatial data, we will visualize it separately.

TODO.

In [None]:
df_XCO2 = df.pop('XCO2')

## Others

In [None]:
# All data points
data = []
for name, dataset in df.items():
    mean = dataset.groupby(by=['ylat', 'xlon']).mean().reset_index()
    trace = dict(
        lat=mean.ylat,
        lon=mean.xlon,
        mode='markers',
        marker=dict(
            color=mean[name],
            opacity=1
        ),
        text=mean[name],
        name=name,
        type='scattermapbox',
        hoverinfo='text'
    )
    data.append(trace)

# Only SOCAT data
data_socat = []
for name, dataset in df_socat.items():
    mean = dataset.groupby(by=['ylat', 'xlon']).mean().reset_index()
    trace = dict(
        lat=mean.ylat,
        lon=mean.xlon,
        mode='markers',
        marker=dict(
            color=mean['{}_socat'.format(name)],
            opacity=1
        ),
        text=mean['{}_socat'.format(name)],
        name='{}_socat'.format(name),
        type='scattermapbox',
        hoverinfo='text'
    )
    data_socat.append(trace)

In [None]:
buttons = []
for i in range(len(data)):
    trace = data[i]
    button = dict(
        label=trace['name'],
        method='update',
        args=[{'visible': [i == j for j in range(len(data))] + [False] * len(data_socat)}]
    )
    buttons.append(button)

for i in range(len(data_socat)):
    trace = data_socat[i]
    button = dict(
        label=trace['name'],
        method='update',
        args=[{'visible': [False] * len(data) + [i == j for j in range(len(data_socat))]}]
    )
    buttons.append(button)

In [None]:
updatemenus = list([
    dict(active=-1,
         buttons=buttons
    )
])

In [None]:
layout = dict(
    margin=dict(t=0,b=0,r=0,l=0),
    autosize=True,
    hovermode='closest',
    showlegend=False,
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=38,
            lon=-94
        ),
        pitch=0,
        zoom=0,
        style='light'
    ),
    updatemenus=updatemenus
)

In [None]:
fig = dict(data=data+data_socat, layout=layout)
offline.iplot(fig, filename='scatter-mode')

In [None]:
offline.plot(fig, filename="mean_pCO2_testbed_member_001_data_visualization.html", auto_open=False)