# Project Pythia: GeoCAT-Comp
## Part one: Data Analysis tools
In the next few code blocks, we'll show how to extract specific signals from meteorological data.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr

import geocat.datafiles as gdf
from geocat.comp import fourier_filter

In [None]:
# Open a netCDF data file using xarray default engine and load the data into xarrays
dataset = xr.DataArray(pd.read_csv(
    gdf.get("ascii_files/CO-OPS_9415020_wl.csv")))
xr_data = dataset.loc[:, 'Verified (ft)']

In [None]:
# Set points per hour
data_freq = 10

# Set tide cycle and frequency resolution
tide_freq1 = 1 / (1 * 12.4206)
tide_freq2 = 1 / (2 * 12.4206)
res = data_freq / (len(xr_data))

# Define cutoff_frequency_low and cutoff_frequency_high based on tide frequency
cflow1 = tide_freq1 - res * 5
cfhigh1 = tide_freq1 + res * 5
cflow2 = tide_freq2 - res * 5
cfhigh2 = tide_freq2 + res * 5

# Generate figure with 1 subplot and set its size (width, height) in inches
fig, ax = plt.subplots(1, 1, dpi=100, figsize=(8, 4), constrained_layout=True)

# Load signal data and plot it
no_tide = xr_data
ax.plot(no_tide[2000:3000])

# Plot filtered signal data using fourier_filter for the first set of cutoffs
no_tide = fourier_filter(no_tide,
                         data_freq,
                         cutoff_frequency_low=cflow1,
                         cutoff_frequency_high=cfhigh1,
                         band_block=True)
ax.plot(no_tide[2000:3000])

# Plot filtered signal data using fourier_filter for the second set of cutoffs
no_tide = fourier_filter(no_tide,
                         data_freq,
                         cutoff_frequency_low=cflow2,
                         cutoff_frequency_high=cfhigh2,
                         band_block=True)
ax.plot(no_tide[2000:3000])

# Show figure
fig.show()

In [None]:
# Generate figure with 2 by 1 subplots and set its size (width, height) in inches
fig, axs = plt.subplots(2, 1, dpi=100, figsize=(8, 4), constrained_layout=True)

# Plot the real set of data utilizing NumPy's Fourier Transform function using both
# the original data and the fourier_filter applied to the second set of cutoffs
axs[0].set_title('real')
axs[0].plot(np.real(np.fft.fft(xr_data)[1:100]))
axs[0].plot(np.real(np.fft.fft(no_tide)[1:100]))

# Plot the imaginary set of data utilizing NumPy's Fourier Transform function using both
# the original data and the fourier_filter applied to the second set of cutoffs
axs[1].set_title('imag')
axs[1].plot(np.imag(np.fft.fft(xr_data)[1:100]))
axs[1].plot(np.imag(np.fft.fft(no_tide)[1:100]))

# Show figure
fig.show()

In [None]:
# Generate figure with 2 by 1 subplots and set its size (width, height) in inches
fig, axs = plt.subplots(1, 1, dpi=100, figsize=(8, 4), constrained_layout=True)

# Define start and end of data indices
start = 0
end = -1

# Plot the real and imaginary sets of data from the original and filtered data
axs.set_title('real')
axs.plot(np.real(xr_data)[start:end])
axs.plot(np.real(no_tide)[start:end])

# Show plot
fig.show()

# Part 2
Sea surface temperature in San Francisco bay, from a netcdf file.

In [None]:
dataset2 = xr.load_dataarray(gdf.get("netcdf_files/sfbay_2020_met.nc"))
dataset2

In [None]:
data2 = dataset2.loc[:,'Water Temp (°C)']
time2 = dataset2.loc[:,'Time (GMT)']


In [None]:
# this dataset used '-' to mark missing values so replace them with np.nan
data2[data2=='-']=np.nan
data2 = data2.astype(float)
# this is real data, check for nans
data2[np.isnan(data2)].dim_0

In [None]:
# we've got nans, so make the rest floats, 
# and then since our nans aren't consecutive, 
# we can interpolate them.
index = -1
for d2 in data2:
    index+=1
    if np.isnan(d2):
        data2[index]=(data2[index-1]+data2[index+1])/2 #interp nans
        print(index)

In [None]:
data2.plot()

## What is the underlying information upon which fourier filers act?
Fourier decomposition is transform from time series data frequency series data. 
This is done either in the general case with a Discrete Fourier Transform, which is computationally expensive $O(n^2)$ and has abritrary freqeuncy resolution, or via the Fast Fourier Transforms which works on the specific case of data with a uniform sample rate, and has a frequency resolution of $resolution = (rate)/(n)$, but has the advatage of being comparatively computationally inexpensive $O(n*log(n))$.


In [None]:
# Generate figure with 2 by 1 subplots and set its size (width, height) in inches
fig, axs = plt.subplots(2, 1, dpi=100, figsize=(8, 4), constrained_layout=True)

# Plot the real set of data utilizing NumPy's Fourier Transform function using both
# the original data and the fourier_filter applied to the second set of cutoffs
axs[0].set_title('real')
axs[0].plot(np.real(np.fft.fft(data2)[1:25]))

# Plot the imaginary set of data utilizing NumPy's Fourier Transform function using both
# the original data and the fourier_filter applied to the second set of cutoffs
axs[1].set_title('imag')
axs[1].plot(np.imag(np.fft.fft(data2)[1:25]))

# Show figure
fig.show()

## Data manipulation

We want to detrend the data, but we aren't sure what trends are in the dataset, and we don't want to remove something interesting, so we will plot the freqencies we intend to remove against our dataset before modifying our data. 

In [None]:
data_freq2 = 1 #points per hour
res2 = data_freq2/(len(data2)) #(fft resolution in cycles per hour)
cflow = res2*0 # steady state inclusive
cfhigh = res2*4 # up to 3 cycles per year (n-1)
blocked = fourier_filter(data2, 
                         data_freq2,
                         cutoff_frequency_low=cflow,
                         cutoff_frequency_high=cfhigh,
                         band_pass=True)
data2.plot()
blocked.plot()

In [None]:
data_freq2 = 1 #points per hour
res2 = data_freq2/(len(data2)) #(fft resolution in cycles per hour)
cflow = res2*0 # lower bound zero cycles per year (inclusive)
cfhigh = res2*4 # Upper bound one cycles per year (exclusive)
detrended = fourier_filter(data2,
                           data_freq2,
                           cutoff_frequency_low=cflow,
                           cutoff_frequency_high=cfhigh,
                           band_block=True)
cflow = res2*4 # steady state inclusive
cfhigh = res2*13 # up to 12 cycles per year (n-1)
blocked = fourier_filter(data2, 
                         data_freq2,
                         cutoff_frequency_low=cflow,
                         cutoff_frequency_high=cfhigh,
                         band_pass=True)
detrended.plot()
blocked.plot()