In [None]:
# JM: 03 Jan 2022
# notebook to go through "basic" python and notebook things

# mantra of the course: If you have a code problem, try Google first. A large part of programing is
#                       experience, and you gain experience more efficiently by trying to fix code yourself.

import matplotlib.pyplot as plt
import numpy as np
import copy
import pandas as pd
from datetime import datetime, timedelta

In [None]:
# Fourier transform
# lets create an idealised signal for demonstration purposes
# domain = [0, 2 \pi] so that wavenumbers are integers, keeping number of points small-ish for now

# NOTE:
# I am going to refer to "wavenumber" generally, but if we are talking about time it really
# should be "angular frequency"

k, N = 1, 8
t = np.linspace(0, 2.0 * np.pi, N)
f = np.sin(k * t)

fig = plt.figure(figsize=(5, 3))
ax = plt.axes()
ax.plot(t, f, 'rx-')
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$f$")
ax.grid()

In [None]:
# Fourier transform
# now, there are a few things to be careful about, and to test these we need to know what the answer should be
# so for the signal above, the fourier transform should just give you something at k = 1
# (more precisely, a sine signal should only give you something in the imaginary part)

f_h = np.fft.fft(f)
print(f"raw output = {f_h}")
print(" ")
print(f"abs of the output = {abs(f_h)}")

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-')
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$f$")
ax.grid()

# create the wavenumber array, just integers here from zero to int(N/2)
k_vec = np.arange(int(N/2+1))
ax = plt.subplot(1, 2, 2)
ax.plot(k_vec, abs(f_h[k_vec]), 'bo', markersize=12)
ax.set_xlabel(r"$k$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

In [None]:
# power is not zero everywhere when it should be?
# the fft routine here (as with convention generally) assumes the right-hand periodic end point is not included

k, N = 1, 8
t = np.linspace(0, 2.0 * np.pi, N, endpoint=False)
f = np.sin(k * t)

f_h = np.fft.fft(f)
print(f"raw output = {f_h}")
print(" ")
print(f"abs of the output = {abs(f_h)}")

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-')
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$f$")
ax.grid()

# create the wavenumber array, just integers here from zero to int(N/2)
k_vec = np.arange(int(N/2+1))
ax = plt.subplot(1, 2, 2)
ax.plot(k_vec, abs(f_h[k_vec]), 'b-o', markersize=12)
ax.set_xlabel(r"$k$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

# now much better, and as expected
# try changing N and k, and explore what happens if you make k bigger than int(N/2) [Nyquist frequency]

# there are either conventions one might have to be careful about 
# e.g. normalisation
# notice that the power in the graph below is N/2 and not 1, even though the amplitude of f is 1
# this is a CHOICE, and is consistent with the ifft (inverse fft) which turns f_h to f with the right magnitude
# this matters in some cases (e.g. if you are computing for things in spectral space), but for our purposes
#   where we are mostly interested in the shape of the spectrum, we might not care, because while the normalisation
#   is "wrong" we are consistently "wrong"

# see more in the manual

# Q. also see how "np.fft.rfft" works, or how the fftpack form scipy works

In [None]:
# now the actual power of doing fft, to decompose a signal
# cook up an artifical signal with two bits

# the signal will look more complicated in real space, but it is ultimately a sine and cosine at two different
# wave numbers, so in spectral space there will only be power at two bits

N = 16
a1, k1 = 1.0, 1.0
a2, k2 = 0.5, 2.0
t = np.linspace(0, 2.0 * np.pi, N, endpoint=False)
f1 = a1 * np.sin(k1 * t)
f2 = a2 * np.cos(k2 * t)
f = f1 + f2

f_h = np.fft.fft(f)

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-', label=r"$f$")
ax.plot(t, f1, 'k:', alpha=0.5, label=r"f1")
ax.plot(t, f2, 'k--', alpha=0.5, label=r"f2")
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$f$")
ax.legend()
ax.grid()

# create the wavenumber array, just integers here from zero to int(N/2)
k_vec = np.arange(int(N/2+1))
ax = plt.subplot(1, 2, 2)
ax.plot(k_vec, abs(f_h[k_vec]), 'bo-', markersize=12)
ax.set_xlabel(r"$k$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

In [None]:
# and of course one can be somewhat artbitary about this
      
N = 32
t = np.linspace(0, 2.0 * np.pi, N, endpoint=False)

np.random.seed(4167)

# try and explain what the below loop is doing
f = np.zeros(N)
for i in range(16):
    amp  = np.random.rand()
    wnum = np.random.randint(low=-N/4, high=N/4)  # restrict the wavenumbers to half the Nyquist range
    if i % 2 == 0:
        f += amp * np.sin(wnum * t)
    else:
        f += amp * np.cos(wnum * t)

f_h = np.fft.fft(f)

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-', label=r"$f$")
ax.plot(t, np.cos(5 * t), 'k--', alpha=0.5)
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$f$")
ax.legend()
ax.grid()

# create the wavenumber array, just integers here from zero to int(N/2)
k_vec = np.arange(int(N/2+1))
ax = plt.subplot(1, 2, 2)
ax.plot(k_vec, abs(f_h[k_vec]), 'bo-', markersize=12)
ax.set_xticks(np.arange(N/2+1))
ax.set_xlabel(r"$k$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

# the signal is complicated, but it is saying the signal has (for seed 4167) most of the contribution
# from the wavenumber 5, which is sort of what we see by inspection

In [None]:
# applying more "realistic" data

# so far we chose the domain to be [0, 2*pi], so that wavenumbers are integers
# but ultimate we want to be on some [a, b], so we need to shift and stretch the domain accordingly
# shifting by itself doesn't really do anything as such, since that's just a redefinition of the co-ordinate
# stretching the domain however requires some extra care as the co-ordinate

# easier with an example maybe
# on [0, 2*pi], sin(1 * t) covers 1 wavelength
# on [0, L], convince yourself sin(2.0 * pi / L * t) is the equivalent, either mathematically or by graphical means

N, L, k = 16, 1, 1
fig = plt.figure(figsize=(10, 3))

ax = plt.subplot(1, 2, 1)
t1 = np.linspace(0, 2 * np.pi, N, endpoint=False)
f1 = np.sin(k * t1)
ax.plot(t1, f1, 'bx-')
ax.set_xlabel(r"$t_1$")
ax.set_ylabel(r"$f_1$")
ax.grid()

ax = plt.subplot(1, 2, 2)
t2 = np.linspace(0, L, N, endpoint=False)
f2 = np.sin(k * 2.0 * np.pi * t2 / L)
ax.plot(t2, f2, 'rx-')
ax.set_xlabel(r"$t_2$")
ax.set_ylabel(r"$f_2$")
ax.grid()

In [None]:
# so then really what is going to happen when you plot in spectral space is that you are going
# to redefine the wavenumber array
# on [0, 2*pi], k = (0, 1, 2, 3, 4...)
# on [0, L], k -> k_new * 2*pi/L, so k = (0, 1, 2, 3, 4...) * (2*pi/L)

N = 32
L = 1
t = np.linspace(0, L, N, endpoint=False)

conversion_factor = 2.0 * np.pi / L

np.random.seed(4167) # same seed as above

f = np.zeros(N)
for i in range(16):
    amp  = np.random.rand()
    wnum = np.random.randint(low=-N/4, high=N/4) * conversion_factor
    if i % 2 == 0:
        f += amp * np.sin(wnum * t)
    else:
        f += amp * np.cos(wnum * t)

f_h = np.fft.fft(f)

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-', label=r"$f$")
ax.plot(t, np.cos(5 * conversion_factor * t), 'k--', alpha=0.5)
ax.set_xlabel(r"$t$")
ax.set_ylabel(r"$f$")
ax.legend()
ax.grid()

# create the wavenumber array, just integers here from zero to int(N/2)
k_vec = np.arange(int(N/2+1))
ax = plt.subplot(1, 2, 2)
ax.plot(k_vec * conversion_factor, abs(f_h[k_vec]), 'bo-', markersize=12)
ax.set_xticks(np.arange(N/2+1)[::2] * conversion_factor)
ax.set_xlabel(r"$k$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

# nothing has been done as such here: we redfined the domain, so redfined the wavevector and plot 
# essentially the same thing

In [None]:
# by itself this might not seem that useful, but recalling that k = 2*pi*frequency, we would have
# on [0, L],              k = (0, 1, 2, 3, 4...) * (2*pi/L)
#         or 2*pi*frequency = (0, 1, 2, 3, 4...) * (2*pi/L)
#         or      frequency = (0, 1, 2, 3, 4...) / L
# so we could now pick definitively pick out the frequency that contributes the most to a signal observed

# e.g. suppose we now interpret time going from [0, L] in units of seconds, then we have a signal
#      varying in real time, so we can pick out the dominant frequency from this

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-', label=r"$f$")
ax.plot(t, np.cos(5 * conversion_factor * t), 'k--', alpha=0.5)
ax.set_xlabel(r"$t$ (s)")
ax.set_ylabel(r"$f$")
ax.legend()
ax.grid()

# one way to avoid divide by zero, convince yourself what is going on
k_vec = np.arange(int(N/2+1))

ax = plt.subplot(1, 2, 2)
ax.plot(k_vec / L, abs(f_h[k_vec]), 'bo-', markersize=12)
ax.set_xlim([0, N/2+1])
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{s}^{-1}\ \mathrm{or}\ \mathrm{Hz})$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

# in this case (for seed 4167) it is saying that the dominant contribution is from a wave at frequency
# % Hz (5 oscillations per second, which is what we see of course)

# if instead of a signal in time you have a signal in space, you can in this case back out a
# dominant wavelength accordingly (see exercise)

In [None]:
# Fourier transform as a means to high / low pass a signal

# since Fourier transforms can decompose a wave into various well-defined signals, we could also use it
# as a way to filter a signal by wiping out some portion of the spectrum
# so idea is that
# 1) take signal and Fourier transform
# 2) in spectral space, wipe out the low  wavenumbers if you want to high pass (i.e. keep high frequencies)
#                                    high wavenumbers                low  pass (i.e. keep low  freqencies)
# 3) transform signal back into real space

# the slightly tricky thing is to know which entries of the array to modify, and this depends
# on the convention the FFT routine decides to use
# -- for simplicity I am going to cheat and use np.fft.rfft, which automatically wipes out the
#    (-k/2 + 1, ... -2, -1) part for a purely real signal which we have here

# e.g. with above, but I wipe out anything larger than OR EQUAL to 5 Hz (so I am doing a low pass)

f_h_r = np.fft.rfft(f)
print(f"length of fft(f) is {len(f_h)}, but length of rfft(f) is {len(f_h_r)}")

k_vec = np.arange(int(N/2+1))         # this is for plotting reasons
freq_vec = np.arange(int(N/2+1)) / L  # L = 1 so doesn't actually do anything
f_h_r_mod = copy.deepcopy(f_h_r)      # python quirk: deep copy instead of soft copy
f_h_r_mod[freq_vec >= 5.0] = 0         # set appropriate entries to zero
f_mod = np.fft.irfft(f_h_r_mod)

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-', label=r"$f$", alpha=0.5)
ax.plot(t, f_mod, 'gx-', label=r"$f$ mod")
ax.set_xlabel(r"$t$ (s)")
ax.set_ylabel(r"$f$")
ax.legend()
ax.grid()

# one way to avoid divide by zero, convince yourself what is going on
k_vec = np.arange(int(N/2+1))

ax = plt.subplot(1, 2, 2)
ax.plot(k_vec / L, abs(f_h_r[k_vec]), 'bo-', markersize=12, alpha=0.5)
ax.plot(k_vec / L, abs(f_h_r_mod[k_vec]), 'go-', markersize=12)
ax.set_xlim([0, N/2+1])
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{s}^{-1}\ \mathrm{or}\ \mathrm{Hz})$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

In [None]:
# Fourier transform as a means to high / low pass a signal

# e.g. as above, but I wipe out anything less than 5 Hz (so I am doing a high pass)

f_h_r = np.fft.rfft(f)
print(f"length of fft(f) is {len(f_h)}, but length of rfft(f) is {len(f_h_r)}")

k_vec = np.arange(int(N/2+1))         # this is for plotting reasons
freq_vec = np.arange(int(N/2+1)) / L  # L = 1 so doesn't actually do anything
f_h_r_mod = copy.deepcopy(f_h_r)      # python quirk: deep copy instead of soft copy
f_h_r_mod[freq_vec < 5.0] = 0         # set appropriate entries to zero
f_mod = np.fft.irfft(f_h_r_mod)

fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(t, f, 'rx-', label=r"$f$", alpha=0.5)
ax.plot(t, f_mod, 'gx-', label=r"$f$ mod")
ax.set_xlabel(r"$t$ (s)")
ax.set_ylabel(r"$f$")
ax.legend()
ax.grid()

# one way to avoid divide by zero, convince yourself what is going on
k_vec = np.arange(int(N/2+1))

ax = plt.subplot(1, 2, 2)
ax.plot(k_vec / L, abs(f_h_r[k_vec]), 'bo-', markersize=12, alpha=0.5)
ax.plot(k_vec / L, abs(f_h_r_mod[k_vec]), 'go-', markersize=12)
ax.set_xlim([0, N/2+1])
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{s}^{-1}\ \mathrm{or}\ \mathrm{Hz})$")
ax.set_ylabel(r"$\hat{f}$")
ax.grid()

In [None]:
# trying this with some real time-series data
# sea level elevation relative to datum at Tobermory at the Island of Mull, Scotland
# (same data presented in OCES 2003, but analysed differently; trimmed out version of data obtained from BODC)

# the file is kind of big, but only need the date and value from it
df = pd.read_csv("./Tobermory_20160430_20161231.csv", skipinitialspace=True, usecols=["Date", "Data value"])

# raw plot to see what it looks like

fig = plt.figure(figsize=(10, 3))
ax = plt.axes()
df.plot(x="Date", ax=ax)
ax.grid()

# the data is every 15 mins and of course very dense, but there are clearly cycles going on
# over this short 6 month period we are going to assume the trends are very small so that we can ignore these,
# and that the data is basically periodic
#   -- it isn't periodic, but this will affect the very high wavenumbers that we can going to largely ignore
#   -- we will not be able to ignore the trend in the next example

In [None]:
# so we want to do a spectrum analysis to pick out the dominant frequencies
#   -- before you calculate, have a think what answer you should get first?
#      ("think before you compute"!)
#
# we can proceed with the usual analysis, with the understanding that
#   -- we have to be careful with interpreting the results at the very high frequencies, in this case
#      when we are near the resolution limit of 15mins (things bigger than a day is probably fine)
#   -- we need to re-scale the wavenumbers accordingly to get the right frequency
#   -- have to be a bit careful with units; I am going to do things in seconds then convert it to hours
#      (I could also just do it in units of hours from the beginning, I am being more verbose for learning reasons)

# 1) ignore the fact there are units for now, just do a (r)fft of the data
#    (rfft to ignore the negative wavenumbers/frequencies)

f_h = np.fft.rfft(df["Data value"])
k_vec = np.arange(len(f_h))  # create wavenumber/frequency array, not scaled correctly

# raw plot of spectrum to see shape
fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(k_vec, abs(f_h))
ax.set_xlabel(r"k")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

ax = plt.subplot(1, 2, 2)
ax.loglog(k_vec, abs(f_h))
ax.set_xlabel(r"k")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

# can't really see anything with linear plot, see a bit more with loglog plot
# Q. there is one massive peak around k = 400 or so, and some smaller peaks, can you guess what those
#    correspond to before further calculations?

In [None]:
# 2) rescale the wavenumbers and plot again
#    -- we don't particularly care about shifts in the time-axis
#    -- but we do care about the length of the time window, L = last time - first time

# convert the dates from strings to something like numbers so we can do subtractions on
t0 = datetime.strptime(df["Date"].values[ 0], "%Y/%m/%d %H:%M:%S")
tf = datetime.strptime(df["Date"].values[-1], "%Y/%m/%d %H:%M:%S")
L = (tf - t0).total_seconds()  # work out time difference, and convert from days + seconds to just seconds

freq_vec = k_vec / L

# raw plot of spectrum to see shape
fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(freq_vec, abs(f_h))
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{s}^{-1})$")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

ax = plt.subplot(1, 2, 2)
ax.loglog(freq_vec, abs(f_h))
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{s}^{-1})$")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

In [None]:
# 3) not in very useful units, so convert this to per day

# convert the dates from strings to something like numbers so we can do subtractions on
t0 = datetime.strptime(df["Date"].values[ 0], "%Y/%m/%d %H:%M:%S")
tf = datetime.strptime(df["Date"].values[-1], "%Y/%m/%d %H:%M:%S")
L = (tf - t0).total_seconds()  # work out time difference, and convert from days + seconds to just seconds

freq_vec = k_vec / (L / (24 * 3600))  # 24 * 60 * 60 seconds in one day

M2_freq = 2  # M2 semi-diurnal tide is twice daily

ax = plt.axes()
ax.loglog(freq_vec, abs(f_h))
ax.plot([M2_freq, M2_freq], [1e-2, 1e5], 'k--', alpha=0.7)  # plot the expect M2 tide frequency as well
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{day}^{-1})$")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

# add the tick in
xt = ax.get_xticks() 
xt = np.append(xt, 2.0)
xtl= xt.tolist()
xtl[-1]=r"M2"
ax.set_xticks(xt)
ax.set_xticklabels(xtl)
ax.set_xlim([1e-2, 1e2]);

# could make this more presentable but will leave as exercise for the reader

# Q. convince yourself through plotting that the other peaks you see correspond to the other dominant modes
# Q. (harder) as above, but bully the computer to pick out the largest 10 peaks, the corresponding frequencies 
#             from "freq_vec" in whatever unit you decide is useful for you, and compare this with the known
#             tidal harmonics

In [None]:
# lets try another example where one needs to be a bit careful
# recall from 07_time_series there is El-Nino 3.4 data, lets read that and plot the trend etc.
# (in the old-fashioned way to get the data as a 1d array; try do this in pandas instead)

with open("elnino34_sst.data", "r") as f:
    elnino34_txt = f.readlines()
elnino34_txt = elnino34_txt[3:-4]
for k in range(len(elnino34_txt)):
    elnino34_txt[k] = elnino34_txt[k].strip("\n")

# then we split each line (as a string) up into components
elnino34_txt[0].split()

# so we could define an empty list, cycle through each line, split, and add in the entries
# but skipping the first one if we only want the SST entries

elnino34_sst = []
for k in range(len(elnino34_txt)):           # this is the new elnino34_txt after stripping out some lines
    dummy = elnino34_txt[k].split()          # split out the entries per line
    for i in range(1, len(dummy)):           # cycle through the dummy list but skip the first entry
        elnino34_sst.append(float(dummy[i])) # turn string into a float, then add to list

elnino34_sst = np.array(elnino34_sst)

# the data file itself records monthly averaged SST data from 1950 Jan to 2019 Dec
# (see 07_time_series)
# just going to manually create a (not entirely correct) time array here in a dirty fashion
# (but only for plotting reasons so whatever...)

t = np.linspace(1950, 2019, len(elnino34_sst))

# linear trend; again just be careful here that time units are in YEARS
p = np.polyfit(t, elnino34_sst, 1)
lin_trend = p[0] * t + p[1]

fig = plt.figure(figsize=(10, 3))
ax = plt.axes()
ax.plot(t, elnino34_sst, 'C0')
ax.plot(t, lin_trend, 'k--')
ax.text(1990, 24.5, f"trend = ${p[0]:.3f}^{{\circ}}\ \mathrm{{C}}$ per year", color="k")
ax.set_xlabel(r"$t$ (years)")
ax.set_ylabel(r"SST (${}^{\circ}\mathrm{C}$)")
ax.grid()

# Q. swap this out with pandas and/or datetime

In [None]:
# the graph clearly has some sort of oscillation period, so we want to pick it out
# a standard spectral analysis then results in the following graph
# (plotting this as frequency)

f_h = np.fft.rfft(elnino34_sst)
k_vec = np.arange(len(f_h))  # create wavenumber/frequency array, not scaled correctly
L = (t[-1] - t[0])           # work out time difference; this is in YEARS
freq_vec = k_vec / L         # convert to frequency, units of PER YEARS

# convert this to period T = 1 / f, because we are interested here in the period
# avoid dividing by zero by replacing zero with small number
freq_vec[freq_vec == 0] = 1e-16
peri_vec = 1.0 / freq_vec  # period in units of YEARS

# raw plot of spectrum to see shape
fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(peri_vec, abs(f_h))
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{yr})$")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

ax = plt.subplot(1, 2, 2)
ax.loglog(peri_vec, abs(f_h))
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{yr})$")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

In [None]:
# there is massive power on the high periods (low frequencies) that swamp everything!

# reason here is that the signal has a (global warming) trend, so signal is really not periodic,
# so Fourier transforms formally don't apply and in this case the "malfunction" is very dramatic

# we can fix this by de-trending the signal and then repeating the calculation
# (same code as above, except the first line has been changed)

f_h = np.fft.rfft(elnino34_sst - lin_trend)
k_vec = np.arange(len(f_h))  # create wavenumber/frequency array, not scaled correctly
L = (t[-1] - t[0])           # work out time difference; this is in YEARS
freq_vec = k_vec / L         # convert to frequency, units of PER YEARS

# convert this to period T = 1 / f, because we are interested here in the period
# avoid dividing by zero by replacing zero with small number
freq_vec[freq_vec == 0] = 1e-16
peri_vec = 1.0 / freq_vec

# raw plot of spectrum to see shape
fig = plt.figure(figsize=(10, 3))
ax = plt.subplot(1, 2, 1)
ax.plot(peri_vec, abs(f_h))
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{yr})$")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

ax = plt.subplot(1, 2, 2)
ax.loglog(peri_vec, abs(f_h))
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{yr})$")
ax.set_ylabel(r"$|f_h|$")
ax.grid()

In [None]:
# got rid of the large power at the long periods, but the data spans a massive period
# also note that we have data over about 50 years, so using that to infer for periods longer
# than smells of an illegal manoeuvre
# current choice of axis not particularly informative, so lets just modify that
# (going to ignore the log plot here because our intended x-range is not very wide)

# raw plot of spectrum to see shape
fig = plt.figure(figsize=(10, 3))
ax = plt.axes()
ax.plot(peri_vec, abs(f_h), 'C0-x')
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{yr})$")
ax.set_ylabel(r"$|f_h|$")
ax.set_xlim([0, 10])
ax.grid()

# there is quite a bit of power between the 2 to 7 year period, which is roughly where the El-Nino
# range is expected to be, so data is at least consistent with expectations

# Q. provide a physical explanation for the massive peak at the 1 year period
# Q. (harder) there is a reasonably sized pear at 0.5 year period, speculate the reason for its exsistence
# Q. (harder) if you look up to 20 years you will find some power between the 10 to 20 year period,
#             speculate and/or look up to the reason for that
#
# Q. try starting from the the full signal, but filter using say the Gaussian filter or analogous (from 07),
#    and compare the relevant power spectra. Note any quantitative and qualitative differences.

In [None]:
# could filter some modes out and see what the resulting signal looks like
# here I am going to get rid of anything outside of the 2 to 7 year period

f_mod_h = copy.deepcopy(f_h)
f_mod_h[(peri_vec < 2.0) | (peri_vec > 7.0)] = 0

elnino34_sst_mod = np.fft.irfft(f_mod_h)

# raw plot of spectrum to see shape
fig = plt.figure(figsize=(10, 8))
ax = plt.subplot(2, 1, 1)
ax.plot(peri_vec, abs(f_h), 'C0-x', alpha=0.5)
ax.plot(peri_vec, abs(f_mod_h), 'C1-o')
ax.set_xlabel(r"$\mathsf{f}\ (\mathrm{yr})$")
ax.set_ylabel(r"$|f_h|$")
ax.set_xlim([0, 10])
ax.grid()

t = np.linspace(1950, 2019, len(elnino34_sst))
p = np.polyfit(t, elnino34_sst, 1)
lin_trend = p[0] * t + p[1]

ax = plt.subplot(2, 1, 2)
ax.plot(t, elnino34_sst, 'C0', alpha=0.5)
ax.plot(t, lin_trend, 'k--')
ax.plot(t, elnino34_sst_mod + lin_trend, 'C1')  # Q. why do we need to do this?
ax.text(1990, 24.5, f"trend = ${p[0]:.3f}^{{\circ}}\ \mathrm{{C}}$ per year", color="k")
ax.set_xlabel(r"$t$ (years)")
ax.set_ylabel(r"SST (${}^{\circ}\mathrm{C}$)")
ax.grid()