# Data Exploration

In [None]:
# Modules
import warnings
warnings.simplefilter('ignore') # filter some warning messages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import seaborn as sns
import datetime as dt
import fsspec
import s3fs
import scipy.stats as stats
# make datasets display nicely
xr.set_options(display_style="html")  

#magic fncts #put static images of your plot embedded in the notebook
%matplotlib inline  
plt.rcParams['figure.figsize'] = 12, 6
%config InlineBackend.figure_format = 'retina' 

# Mole Crab Data

In [None]:
### Read and explore mole crab data
crabs = pd.read_csv('./MoleCrab_abundance_annual.csv',index_col=2)

crabs.head()

In [None]:
crabs = pd.read_csv('./MoleCrab_abundance_annual.csv',index_col=2)
crabs.tail()

In [None]:
### Plot a distribution of abundance
sns.distplot(crabs['Abundance (psm)'])

In [None]:
### Plot annual data

plt.bar(crabs.index,crabs['Abundance (psm)'])

plt.xticks([*range(2000,2020,2)])

plt.grid()

plt.show()

In [None]:
### Read Sea Surface Data Available in zar

file_location = 's3://mur-sst/zarr'

ikey = fsspec.get_mapper(file_location, anon=True)

ds_sst = xr.open_zarr(ikey,consolidated=True)

#ds_sst

In [None]:
### Read data that matches crab data, in time and location

sst_timeseries = ds_sst['analysed_sst'].sel(time = slice('2003-01-01','2016-12-31'),
                                            lat  = 37.76,
                                            lon  = -124.5
                                           ).load()

### Plot and explore it
sst_timeseries.plot()
sst_timeseries

In [None]:
### Average data annually

sst_annual = sst_timeseries.groupby('time.year').mean('time',keep_attrs=True)

sst_annual.plot()

### Add temperature data to the crab data frame

crabs = crabs[crabs.index>=2003]

tmp = pd.DataFrame(data=sst_annual.data - 273.15, columns={'SST'}, index=[*range(2003,2018)])

crabs['SST'] = tmp

### Take a look

crabs

### Scatter Plot

plt.figure(figsize=(8,6))

plt.plot(crabs['SST'],crabs['Abundance (psm)'],'*')

### Correlation
stats.pearsonr(crabs['SST'], crabs['Abundance (psm)']) 

### Time series plot

plt.figure(figsize=(8,6))

plt.plot(crabs['SST'],crabs['Abundance (psm)'],'*')

fig, ax1 = plt.subplots()

color = 'tab:red'

ax1.set_ylabel('SST', color='tab:red')

ax1.plot(crabs.index, crabs['SST'], color='tab:red')

ax1.tick_params(axis='y', labelcolor='tab:red')

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'

ax2.set_ylabel('sin', color=color)  # we already handled the x-label with ax1

ax2.plot(t, data2, color=color)

ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  
