This notebook tries to do GP inference in case of seasonal trends: i.e. for beginning we assume there is some true underlying signal for season 0 and we shift it (for now by constant for simplicity) to get the true underlying signal for season 1

In this setting we can still assume we are deaing with GP setting (this time with additional vector for our observations in the second season)

In [None]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import math
import scipy.stats
import time
from IPython import display
%matplotlib inline

## Defining base signal

In [None]:
x = np.linspace(0, 1, 1000)
true_y = np.random.randn(1) * np.ones(1000)
for i in range(1, 6):
    true_y += np.random.randn(1) * np.sin(i * math.pi * x)
    true_y += np.random.randn(1) * np.cos(i * math.pi * x)
season_offset = np.random.randn(1)

In [None]:
sigma_obs = 0.1

pick_x0 = np.random.choice(range(len(x)), size=5, replace=False)
sample_x0 = x[pick_x0]
sample_y0 = true_y[pick_x0] + sigma_obs * np.random.randn(5)

pick_x1 = np.random.choice(range(len(x)), size=5, replace=False)
sample_x1 = x[pick_x1]
sample_y1 = season_offset + true_y[pick_x1] + sigma_obs * np.random.randn(5)

In [None]:
plt.plot(x, true_y, color="b", label="season_0")
plt.plot(x, true_y + season_offset, color="r", label="season_1")
plt.plot(sample_x0, sample_y0, ".", color="b", label="sample_s0")
plt.plot(sample_x1, sample_y1, ".", color="r", label="sample_s1")
plt.legend()
plt.show()

## Doing inference with seasons

In [None]:
def k_seasonal(x1, x2):
    # calculating kernel matrix for seasonal components
    # It is assumed both x1 and x2 are array with two elements: x corresponding to different seasonalities
    # The final K would look the followign way: 
    # [K1, K3]
    # [K2, K4]
    # where K1 and K4 cover kernel values for x1, x2 observations within the same season
    # K2 and K3 cover cross-season interactions
    # entry at [i, j] position is given by k(x2[i], x1[j])
    
    # gaussian kernel hyperparameters - adjusts the distance between points and variance
    same_season_l = 1
    same_season_scale = 1
    cross_season_l = 0.25
    cross_season_scale = 0.25
    
    x1_matrix = np.tile(x1[0], len(x2[0])).reshape((len(x2[0]), len(x1[0])))
    x2_matrix = np.tile(x2[0], len(x1[0])).reshape((len(x1[0]), len(x2[0]))).transpose()    
    k1_matrix = np.exp(-(x1_matrix - x2_matrix) ** 2 / (2 * same_season_l * same_season_l))

    x1_matrix = np.tile(x1[0], len(x2[1])).reshape((len(x2[1]), len(x1[0])))
    x2_matrix = np.tile(x2[1], len(x1[0])).reshape((len(x1[0]), len(x2[1]))).transpose()    
    k2_matrix = cross_season_scale * np.exp(-(x1_matrix - x2_matrix) ** 2 / (2 * cross_season_l * cross_season_l))

    x1_matrix = np.tile(x1[1], len(x2[0])).reshape((len(x2[0]), len(x1[1])))
    x2_matrix = np.tile(x2[0], len(x1[1])).reshape((len(x1[1]), len(x2[0]))).transpose()    
    k3_matrix = cross_season_scale * np.exp(-(x1_matrix - x2_matrix) ** 2 / (2 * cross_season_l * cross_season_l))

    x1_matrix = np.tile(x1[1], len(x2[1])).reshape((len(x2[1]), len(x1[1])))
    x2_matrix = np.tile(x2[1], len(x1[1])).reshape((len(x1[1]), len(x2[1]))).transpose()    
    k4_matrix = np.exp(-(x1_matrix - x2_matrix) ** 2 / (2 * same_season_l * same_season_l))
    
    return np.hstack([np.vstack([k1_matrix, k2_matrix]), np.vstack([k3_matrix, k4_matrix])])

In [None]:
def gp_posterior(sample_x, sample_y, x, sigma_obs):
    # Calculating posterior for gaussian processes
    # I am specifically interested in posterior mean, std and cholesky matrix for postrior at sampled points (for nei)
    # it is assumed that observations have some additional gaussian noise
    # Special note for the seasonal nature: I assume all sample_x, sample_y and x are arrays with two elements for two seasons
    
    # Separately calculating matrix used to calculate both mean and variance
    K = np.dot(k_seasonal(sample_x, x),
               np.linalg.inv(k_seasonal(sample_x, sample_x) + np.eye(len(sample_x[0]) + len(sample_x[1])) * sigma_obs ** 2)
              )
    
    mu = np.dot(K, np.hstack([sample_y[0], sample_y[1]]))
    sigma = k_seasonal(x, x) - np.dot(K, k_seasonal(x, sample_x))
    std_1d = np.sqrt([sigma[i, i] for i in range(len(mu))])
    
    return mu.reshape(-1), std_1d.reshape(-1)

In [None]:
mu, std_1d = gp_posterior([sample_x0, sample_x1], [sample_y0, sample_y1], [x, x], sigma_obs)

mu0 = mu[:len(x)]
mu1 = mu[len(x):]
std_1d0 = std_1d[:len(x)]
std_1d1 = std_1d[len(x):]

In [None]:
plt.plot(x, true_y, color="b", label="season_0")
plt.plot(x, true_y + season_offset, color="r", label="season_1")
plt.plot(sample_x0, sample_y0, ".", color="b", label="sample_s0")
plt.plot(sample_x1, sample_y1, ".", color="r", label="sample_s1")

# plt.plot(x, mu0, color="b", label="posterior_s0")
plt.fill_between(x, mu0 - 2 * std_1d0, mu0 + 2 * std_1d0, color="b", alpha=0.5)

# plt.plot(x, mu1, color="r", label="posterior_s1")
plt.fill_between(x, mu1 - 2 * std_1d1, mu1 + 2 * std_1d1, color="r", alpha=0.5)

plt.title("GP posterior for seasonal trends")

plt.legend()
plt.show()