# Explore Data 2

In [None]:
# Modules
import warnings
warnings.simplefilter('ignore') # filter some warning messages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import seaborn as sns
import datetime as dt
import fsspec
import s3fs
import scipy.stats as stats
# make datasets display nicely
xr.set_options(display_style="html")  

#magic fncts #put static images of your plot embedded in the notebook
%matplotlib inline  
plt.rcParams['figure.figsize'] = 12, 6
%config InlineBackend.figure_format = 'retina' 

# Mole Crab Data

In [None]:
### Read and look at mole crab data
crabs = pd.read_csv('./Annual_UpwellingAprJul_OceanBeach.csv',index_col=0)
crabs.head()

# SST Data

In [None]:
### Read Sea Surface Data Available in zar

file_location = 's3://mur-sst/zarr'

ikey = fsspec.get_mapper(file_location, anon=True)

ds_sst = xr.open_zarr(ikey,consolidated=True)

#ds_sst

In [None]:
### Read data that matches crab data, in time and location

sst_timeseries = ds_sst['analysed_sst'].sel(time = slice('2003-01-01','2016-12-31'),
                                            lat  = slice(37.7,37.8), #37.76
                                            lon  = slice(-122.55,-122.45) #-124.5
                                           ).load()

# average over lat and lon
sst_timeseries = sst_timeseries.where(sst_timeseries>273.15)
sst_timeseries = sst_timeseries.mean(dim={'lat','lon'},keep_attrs=True,skipna=True)

### Plot and explore it
sst_timeseries.plot()
sst_timeseries

# Making one Pandas DataFrame with all the Data
## Averaged SST for each year's upwelling season

In [None]:
crabs['SST']=np.nan
for i in range(2003,2017):
        tmp = sst_timeseries.sel(time = slice(str(i)+'-04-01',str(i)+'-07-31')).mean()
        #print(tmp.values)
        a=crabs[crabs['Year']==i].index
        crabs.at[a[0],'SST']=tmp.data-273.15
crabs

# Plot data

## Time series plot

In [None]:
plt.figure(figsize=(8,6))
plt.plot(crabs['Year'],crabs['Female'],'*-',label='Female')
plt.plot(crabs['Year'],crabs['Female w Eggs'],'d-',label='Female w Eggs')
plt.plot(crabs['Year'],crabs['Male'],'d-',label='Male')
plt.legend()
plt.ylabel('No. Crabs')
plt.grid()
plt.show()

## Plot of two variables in differnt axis: SST + some Crab data

In [None]:
var='Male'
fig, ax1 = plt.subplots()

color = 'tab:red'
ax1.set_ylabel('SST', color=color)
ax1.plot(crabs.Year, crabs['SST'], '.-',color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
color = 'tab:blue'
ax2.set_ylabel(var+' Crabs', color=color)  # we already handled the x-label with ax1
ax2.plot(crabs.Year, crabs[var], '*-',color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  

## Scatter plot

In [None]:
var='Female w Eggs'
plt.figure(figsize=(8,6))
plt.scatter(crabs['SST'],crabs[var],40, c=crabs['Year'],label='Female',cmap='jet')
plt.colorbar()
plt.legend()
plt.ylabel('No. Crabs')
plt.xlabel('SST')
plt.grid()
plt.show()

## Correlation between SST and crab data

In [None]:
print('SST-variable correlation\n(Correlation, p-value) \n (<0.05 is good!)\n')
for i in list(crabs)[1:-1]:
    tmp=crabs[(~np.isnan(crabs['SST']))&(~np.isnan(crabs[i]))]
    print(i)
    print(stats.pearsonr(tmp['SST'], tmp[i]))

print('\n')
for i in list(crabs)[1:-1]:
    tmp=crabs[(~np.isnan(crabs['SST']))&(~np.isnan(crabs[i]))&(crabs['Year']!=2015)]
    print(i)
    print(stats.pearsonr(tmp['SST'], tmp[i]))
    
print('\n')
for i in list(crabs)[1:-1]:
    tmp=crabs[(~np.isnan(crabs['SST']))&(~np.isnan(crabs[i]))&(crabs['Year']<2015)]
    print(i)
    print(stats.pearsonr(tmp['SST'], tmp[i]))

# Linear regression

In [None]:
# linear regressions
for i in list(crabs)[1:-1]:
    tmp=crabs[(~np.isnan(crabs['SST']))&(~np.isnan(crabs[i]))]
    print(i)
    slope, intercept, r_value, p_value, std_err = stats.linregress(tmp['SST'], tmp[i])
    print(slope,p_value)