# Structured Time Series with TF Probability
We will be using the total female births per year dataset to explore structured time series' in TFP.

Example is modelled on the example from this <a href ="https://medium.com/tensorflow/structural-time-series-modeling-in-tensorflow-probability-344edac24083">article</a> and the associated <a href = "https://github.com/tensorflow/probability/blob/master/tensorflow_probability/examples/jupyter_notebooks/Structural_Time_Series_Modeling_Case_Studies_Atmospheric_CO2_and_Electricity_Demand.ipynb">code</a>.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import sts
import seaborn as sns

In [2]:
if tf.test.gpu_device_name() != '/device:GPU:0':
  print('WARNING: GPU device not found.')
else:
  print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name()))

SUCCESS: Found GPU: /device:GPU:0


In [3]:
def build_model(observed_time_series):
    
    trend = sts.LocalLinearTrend(observed_time_series=observed_time_series)
    seasonal = tfp.sts.Seasonal(num_seasons=12, observed_time_series=observed_time_series)
    model = sts.Sum([trend, seasonal], observed_time_series=observed_time_series)
    
    return model

def plot_forecast(x, y,
                  forecast_mean, forecast_scale, forecast_samples,
                  title, x_locator=None, x_formatter=None):
    """Plot a forecast distribution against the 'true' time series."""
    colors = sns.color_palette()
    c1, c2 = colors[0], colors[1]
    fig = plt.figure(figsize=(12, 6))
    ax = fig.add_subplot(1, 1, 1)

    num_steps = len(y)
    num_steps_forecast = forecast_mean.shape[-1]
    num_steps_train = num_steps - num_steps_forecast


    ax.plot(x, y, lw=2, color=c1, label='ground truth')

    forecast_steps = np.arange(
        x[num_steps_train],
        x[num_steps_train]+num_steps_forecast)

    ax.plot(forecast_steps, forecast_samples.T, lw=1, color=c2, alpha=0.1)

    ax.plot(forecast_steps, forecast_mean, lw=2, ls='--', color=c2,
           label='forecast')
    ax.fill_between(forecast_steps,
                   forecast_mean-2*forecast_scale,
                   forecast_mean+2*forecast_scale, color=c2, alpha=0.2)

    ymin, ymax = min(np.min(forecast_samples), np.min(y)), max(np.max(forecast_samples), np.max(y))
    yrange = ymax-ymin
    ax.set_ylim([ymin - yrange*0.1, ymax + yrange*0.1])
    ax.set_title("{}".format(title))
    ax.legend()

    if x_locator is not None:
        ax.xaxis.set_major_locator(x_locator)
        ax.xaxis.set_major_formatter(x_formatter)
        fig.autofmt_xdate()

    return fig, ax

In [4]:
filepath = "/media/greg/Storage/datasets/time_series/daily_total_female_births/daily-total-female-births.csv"
df = pd.read_csv(filepath)

print(df.head())

         Date  Births
0  1959-01-01      35
1  1959-01-02      32
2  1959-01-03      30
3  1959-01-04      31
4  1959-01-05      44


In [5]:
t = np.linspace(0,1000, 10000)
births_ts = np.sin(t) + np.random.normal(0,1,10000) # df.Births.astype(np.float) # TFP requires us to work with floats.
birth_dates = np.arange(10000) #df.Date.values

model = build_model(births_ts)

# Build the variational surrogate posteriors `qs`.
variational_posteriors = tfp.sts.build_factored_surrogate_posterior(
    model=model)

Instructions for updating:
SeedStream has moved to `tfp.util.SeedStream`.


In [None]:
#@title Minimize the variational loss.

# Allow external control of optimization to reduce test runtimes.
num_variational_steps = 200 # @param { isTemplate: true}
num_variational_steps = int(num_variational_steps)

optimizer = tf.optimizers.Adam(learning_rate=.1)
# Using fit_surrogate_posterior to build and optimize the variational loss function.
def train():
  elbo_loss_curve = tfp.vi.fit_surrogate_posterior(
    target_log_prob_fn=model.joint_log_prob(
        observed_time_series=births_ts),
    surrogate_posterior=variational_posteriors,
    optimizer=optimizer,
    num_steps=num_variational_steps)
  return elbo_loss_curve

elbo_loss_curve = train()

plt.plot(elbo_loss_curve)
plt.show()

# Draw samples from the variational posterior.
q_samples_births_ = variational_posteriors.sample(50)

In [None]:
print("Inferred parameters:")
for param in model.parameters:
  print("{}: {} +- {}".format(param.name,
                              np.mean(q_samples_births_[param.name], axis=0),
                              np.std(q_samples_births_[param.name], axis=0)))

In [None]:
num_forecast_steps = 10


births_forecast_dist = tfp.sts.forecast(
    model,
    observed_time_series=births_ts,
    parameter_samples=q_samples_births_,
    num_steps_forecast=num_forecast_steps)

In [None]:
num_samples=10

births_forecast_mean, births_forecast_scale, births_forecast_samples = (
    births_forecast_dist.mean().numpy()[..., 0],
    births_forecast_dist.stddev().numpy()[..., 0],
    births_forecast_dist.sample(num_samples).numpy()[..., 0])

In [None]:
fig, ax = plot_forecast(
    list(range(len(birth_dates))), births_ts,
    births_forecast_mean, births_forecast_scale, births_forecast_samples,
    #x_locator=co2_loc,
    #x_formatter=co2_fmt,
    title="Time series plot")
ax.axvline(list(range(len(birth_dates)))[-num_forecast_steps], linestyle="--")
ax.legend(loc="upper left")
ax.set_ylabel("Time series values")
ax.set_xlabel("Year")
fig.autofmt_xdate()