In [None]:
import numpy as np
import pandas as pd

FN_D_US = "/kaggle/input/jhucovid19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
d_us = pd.read_csv(FN_D_US)
cumulative = d_us.loc[:, '1/22/20':].sum(axis=0) 
daily_deaths = np.array(cumulative.diff()[1:]) 

In [None]:
def extrapolate(t_series, weeks=4):
    """Pad data with linearly extrapolated numbers."""
    for _ in range(weeks):
        pad_left = 2*t_series[:7] - t_series[7:2*7]
        pad_right = 2*t_series[-7:] - t_series[-2*7:-7]
        t_series = np.concatenate((pad_left, t_series, pad_right))    
    t_series[t_series < 0] = 0
    pad_sz = weeks * 7
    return t_series, pad_sz

def seven_day_ave(x):
    """Manually compute moving average filter. """
    x2 = np.zeros(len(x) - 6)
    for n in range(len(x2)):
        x2[n] = np.sum(x[n:n+7]) / 7
    return x2

Moving averages are frequently implemented such that they lag the data points they are operating on. The daily numbers are better tracked when the moving average is centered on the values being averaged. Also, by extrapolating past and future data points, the moving average can run to the ends of the time-frame.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

deaths_padded, pad_sz = extrapolate(daily_deaths)    
m_ave = seven_day_ave(deaths_padded)[(pad_sz-3):-(pad_sz-3)] # moving average

start = 50
t = np.arange(len(daily_deaths[start:])) + start
dz = np.zeros(len(t))

fig1, ax1 = plt.subplots(1, 1, figsize=(8, 6), num=1)
ax1.fill_between(t, daily_deaths[start:], dz, alpha=0.1, color="#000000", label="daily deaths")
ax1.plot(t+3, m_ave[start:], label="moving average", linestyle=(0,(2,2)))
ax1.plot(t, m_ave[start:], label="centered moving average")
ax1.set(xlabel='time [days]', ylabel='daily new deaths', title="Aligning The Seven-Day Moving Average")
ax1.axis([min(t), max(t), 0, max(daily_deaths)*1.1])
ax1.legend(loc="upper center", ncol=3)
plt.show()

Moving averages smooth data because they attenuate higher frequency oscillations. However, higher frequency components partially remain, and this results in the jagged-looking plots we are use to seeing. Smoothing is improved by removing higher frequencies completely. Below, is frequency spectrum ``H``. It only includes oscillations with periods of 9 days and longer.

In [None]:
f_lo = 0
f_hi = 1/9
N = 1024
H = np.zeros(N, dtype=np.float64)
for k in range(N):
    if   N*(f_lo)   <= k <= N*(f_hi):
        H[k] = 1
    elif N*(1-f_hi) <= k <= N*(1-f_lo):
        H[k] = 1

Speaking conversely, this spectrum removes frequencies higher than ``1/9 per day``. Now let's apply the computed spectrum to the data. This is accomplished through multiplication in the frequency domain. Let's also remove the portion of the data that corresponds to extrapolated input data.

In [None]:
DP = np.fft.fft(deaths_padded, N)
dp_padded = np.real(np.fft.ifft(H*DP, N))
deaths_smoothed = dp_padded[pad_sz:len(deaths_padded)-pad_sz]

Plotting the data now, it is much more smooth.

In [None]:
    fig2, ax2 = plt.subplots(1, 1, figsize=(8, 6), num=2)
    ax2.fill_between(t, daily_deaths[start:], dz, alpha=0.1, color="#000000", label="daily deaths")
    ax2.plot(t, deaths_smoothed[start:], label="FFT 9-day cut-off")
    ax2.set(xlabel='time [days]', ylabel='daily new deaths', title="FFT Smoothing")    
    ax2.axis([min(t), max(t), 0, max(daily_deaths)*1.1])
    ax2.legend(loc="upper center", ncol=2)
    plt.show()

This plot shows something unexpected: medium-term oscillations greater than 9 days. It's also worth noting that there is a spurious data point did not get smoothed over too well. Let's repeat the smoothing operation again, but with the cut-off period set to 32 days.

In [None]:
f_lo = 0
f_hi = 1/32
N = 1024
H = np.zeros(N, dtype=np.float64)
for k in range(N):
    if   N*(f_lo)   <= k <= N*(f_hi):
        H[k] = 1
    elif N*(1-f_hi) <= k <= N*(1-f_lo):
        H[k] = 1
        
DP = np.fft.fft(deaths_padded, N)
dp_padded = np.real(np.fft.ifft(H*DP, N))
deaths_smoothed_32 = dp_padded[pad_sz:len(deaths_padded)-pad_sz]

In [None]:
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 6), num=2)
ax2.fill_between(t, daily_deaths[start:], dz, alpha=0.1, color="#000000", label="daily deaths")
ax2.plot(t, deaths_smoothed[start:], label="FFT 9-day cut-off", linestyle=(0,(2,2)))
ax2.plot(t, deaths_smoothed_32[start:], label="FFT 32-day cut-off")
ax2.set(xlabel='time [days]', ylabel='daily new deaths', title="FFT Smoothing")
ax2.axis([min(t), max(t), 0, max(daily_deaths)*1.1])
ax2.legend(loc="upper center", ncol=3)
plt.show()

Both the short and the medium-term oscillations are now removed, and the spurious data point is pretty well smoothed over too.



Those medium-term oscillations look very odd. Let's examine them more closely. To do this, we'll define a band-pass spectrum with low and high cut-off frequencies of 1/32 and 1/9, respectively.

In [None]:
f_lo = 1/32
f_hi = 1/9
N = 1024
H = np.zeros(N, dtype=np.float64)
for k in range(N):
    if   N*(f_lo)   <= k <= N*(f_hi):
        H[k] = 1
    elif N*(1-f_hi) <= k <= N*(1-f_lo):
        H[k] = 1
        
DP = np.fft.fft(deaths_padded, N)
dp_padded = np.real(np.fft.ifft(H*DP, N))
deaths_smoothed_32_9 = dp_padded[pad_sz:len(deaths_padded)-pad_sz]

In [None]:
fig3, ax3 = plt.subplots(1, 1, figsize=(8, 6), num=2)
ax3.fill_between(t, daily_deaths[start:], dz, alpha=0.1, color="#000000", label="daily deaths")
ax3.plot(t, deaths_smoothed_32_9[start:], label="FFT band-pass")
ax3.set(xlabel='time [days]', ylabel='daily new deaths', title="FFT Band-Pass")
ax3.axis([min(t), max(t), -max(daily_deaths)*0.1, max(daily_deaths)*1.1])
ax3.legend(loc="upper center", ncol=3)
plt.show()

This shows clear medium-term fluctuations in the data. The spurious data point was caused by a backlog in the US state of New Jersey. As for the other fluctuations, I have no idea what's causing them.

This notebook is based on a research paper titled **"Spectral Processing of COVID-19 Time-Series Data"**. It is available on arXiv at https://arxiv.org/abs/2008.08039. Source code for the paper is available at https://github.com/abstract-theory/Spectral-Processing-COVID-19/.