<font size=6>Ocean Data Analysis - Analyzing Sea Surface Temperature w/ Variational Inference</font>

Interesting Research
https://www.nature.com/articles/s41467-018-08066-0

In [2]:
import warnings
warnings.filterwarnings('ignore')

import xarray as xr
!pip install netcdf4
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

import sklearn 
import sklearn.mixture as mix 
import scipy.stats as stats 

import seaborn as sns
sns.set()



In [3]:
# Texas Gulf of Mexico
ds = xr.open_dataset('https://dods.ndbc.noaa.gov/thredds/dodsC/data/stdmet/alxn6/alxn6.ncml')
ds = ds.sel(time=slice('1991-04-01','2023-04-25'))

# Get buoy data from NOAA
noaa_df = ds.to_dataframe().reset_index()
print(noaa_df)

          latitude  longitude                time  wind_dir  wind_spd  gust  \
0        44.331001 -75.933998 2008-09-01 00:00:00       NaN       NaN   NaN   
1        44.331001 -75.933998 2008-09-01 00:06:00       NaN       NaN   NaN   
2        44.331001 -75.933998 2008-09-01 00:12:00       NaN       NaN   NaN   
3        44.331001 -75.933998 2008-09-01 00:18:00       NaN       NaN   NaN   
4        44.331001 -75.933998 2008-09-01 00:24:00       NaN       NaN   NaN   
...            ...        ...                 ...       ...       ...   ...   
1064240  44.331001 -75.933998 2023-03-31 23:30:00       NaN       NaN   NaN   
1064241  44.331001 -75.933998 2023-03-31 23:36:00       NaN       NaN   NaN   
1064242  44.331001 -75.933998 2023-03-31 23:42:00       NaN       NaN   NaN   
1064243  44.331001 -75.933998 2023-03-31 23:48:00       NaN       NaN   NaN   
1064244  44.331001 -75.933998 2023-03-31 23:54:00       NaN       NaN   NaN   

         wave_height dominant_wpd average_wpd  mean

In [4]:
# Get sea surface temps
cov = noaa_df[['sea_surface_temperature']]
cov = cov.dropna().reset_index().drop(columns=['index'])

# Fit mixture model 
num_components = 4
dpgmm_model = mix.BayesianGaussianMixture(
    n_components=num_components, 
    weight_concentration_prior_type='dirichlet_process',
    n_init=2,
    max_iter=50)
p = dpgmm_model.fit_predict(cov)

# Count States
state_counts = np.zeros(num_components)
for M in p:
  state_counts[M] += 1 
print(state_counts)

[150115. 232069. 248017. 294969.]


In [None]:
# Plot States
fig2, ax2=plt.subplots()

for M in range(len(p)): 
  if p[M] == p[-1]:
    ax2.axvline(M, color='black', alpha=0.002) 
    
sns.lineplot(data=cov['sea_surface_temperature'].values, ax=ax2, alpha=0.4)
plt.ylabel('sst')
plt.xlabel('time')
plt.show()

In [None]:
# Plot States
fig,ax=plt.subplots()
covm = [] 
for M in range(len(p)): 
  if p[M] == p[-1]:
    try:
        covm.append(cov['sea_surface_temperature'].values[M])
    except:
        print('skip')
    
sns.lineplot(data=covm, ax=ax, alpha=0.9)
plt.ylabel('sst')
plt.xlabel('time')
plt.show()

print(np.mean(covm))