<hr style="border:0.2px solid black"> </hr>

<figure>
  <IMG SRC="img/ntnu_logo.png" WIDTH=250 ALIGN="right">
</figure>

**<ins>Course:</ins>** TVM4174 - Hydroinformatics for Smart Water Systems

# <ins>Example:</ins> Time Series Analysis
    
*Developed by David B. Steffelbauer*

<hr style="border:0.2px solid black"> </hr>


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
plt.rcParams['figure.dpi'] = 200
sns.set_style('darkgrid')

### Python datetime format

In [None]:
from datetime import datetime 
now = datetime.now() # current date and time 
now

In [None]:
now.timestamp()

In [None]:
now.strftime('%A, the %B %y, in the calendar week %w at %I:%M %p')

### Working with a Time Series

In [None]:
filename = 'data/Demands.csv'  # units are L/h
data = pd.read_csv(filename, sep=';', index_col=0, decimal=',', parse_dates=[0])
data.shape

In [None]:
flow = data.sum(axis=1)/1000  # transforms to m^3/h

In [None]:
flow

### Indexing and plotting a TS

In [None]:
flow.plot();

In [None]:
flow['2018-1'].plot()

In [None]:
flow['2018-12-23':].plot()

### Resampling and Rolling Windows

In [None]:
q = flow['2018-2-13']

In [None]:
res_q = q.resample('1H').max()
# rol_q = q.rolling('1H', center=True).mean()
rol_q = q.rolling(12, center=True).mean()


colors = sns.color_palette('viridis', 4)
fig, ax = plt.subplots()

q.plot(color=colors[0], label='original', alpha=0.6)
rol_q.plot(color=colors[1], label='rolling', marker='o', ms=4)
res_q.plot(color=colors[2], label='resample', marker='o', ms=10, markerfacecolor='None')
plt.xlabel('time', fontsize=14)
plt.ylabel(r'$Q \quad (L/s)$', fontsize=14)
plt.legend();

### Peak Demand Analysis

In [None]:
peak_demand = flow.resample('1D').max()

In [None]:
fig, ax = plt.subplots()
peak_demand.plot()
peak_demand.rolling(30, center=True).mean().plot()

ax.xaxis.set_major_formatter(mdates.DateFormatter('%b')) 
plt.xlabel(r'$time$')
plt.ylabel(r'$Q_{max}^{D} \quad (L/s)$')

In [None]:
time_peak_demand = flow.groupby(pd.Grouper(freq='D')).idxmax()

t = time_peak_demand

t.dt.time

### Choosing values between particular time

Leak detection, Minimum Night Flow (MNF) Analysis

In [None]:
bt = flow.between_time('02:30', '04:00')

bt

In [None]:
bt[:'2018-01-03'].plot(marker='o')

In [None]:
mnf = bt.resample('1D').mean()

In [None]:
mnf.plot()
mnf.rolling('10D').mean().plot()

## Seasonality

### How to detect periodic signals

In [None]:
from scipy import signal

In [None]:
ts = flow.resample('1H').mean()

# Estimate power spectral density using a periodogram
f, Pxx = signal.periodogram(ts)

In [None]:
# lets plot this periodogram
plt.plot(f, Pxx)
plt.xlim((0, 0.2));
plt.ylim((0, None));
plt.xlabel('Frequency', fontsize=14)
plt.ylabel('Power Spectral Density', fontsize=14)

In [None]:
n = 4

top_n_periods = {}

# get indices for n highest Pxx values
topn_freq_indices = np.flip(np.argsort(Pxx), 0)[0:n]

# use indices from previous step to
# get 3 frequencies with highest power
freqs = f[topn_freq_indices]

# use same indices to get powers as well
power = Pxx[topn_freq_indices]

# we are interested in period and it is calculated as 1/frequency 
periods = 1 / np.array(freqs)

# populate dict with calculated values
for ii in range(n):
    top_n_periods[f'period{ii+1}'] = periods[ii]
    top_n_periods[f'freq{ii+1}'] = freqs[ii]
    top_n_periods[f'power{ii+1}'] = power[ii]

In [None]:
top_n_periods

* https://www.dataquest.io/blog/tutorial-time-series-analysis-with-pandas/
* https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html
* Autocorrelations and Periodigram: http://qingkaikong.blogspot.com/2017/01/signal-processing-finding-periodic.html

## Autocorrelations

In [None]:
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
ts = flow.resample('1H').mean()

plot_acf(ts, lags=200);

### Pivoting time series

In [None]:
df = pd.DataFrame(flow, columns=['q'])
df

In [None]:

df['date'] = df.index.date
df['time'] = df.index.time
df

In [None]:
M = df.pivot_table(values='q', index='date', columns='time')
M

In [None]:
sns.heatmap(M);

In [None]:
df['weekday'] = df.index.day_name()
df

In [None]:
M = df.pivot_table(values='q', index='time', columns='weekday')
M[['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']]

In [None]:
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

colors = sns.color_palette('viridis', len(days))

for ii, day in enumerate(days):
    M[day].plot(color=colors[ii], label=day)
plt.ylim((0, None))
plt.xlim((M[day].index[0], M[day].index[-1]))
plt.legend()

### White Noise

In [None]:
import numpy as np
import pandas as pd

import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

x = np.random.randn(5000)

x = pd.Series(x)


fig, ax1 = plt.subplots(1,1)
x.plot(ax=ax1)
plt.xlabel(r'$t$', fontsize=16)
plt.ylabel(r'$y(t)$', fontsize=16)
# x.rolling(3).mean().plot(ax=ax1)

ax1.set_xlim((x.index[0], x.index[-1]))

fig, ax1 = plt.subplots(1,1)
plot_acf(x, ax=ax1);
plt.xlabel(r'$h$', fontsize=16)
plt.ylabel(r'$\rho(h)$', fontsize=16)

### Random Walk Model

$y_t = y_{t-1} + \epsilon_t$

In [None]:
x.cumsum().plot()
plt.title('Gaussian Random Walk');

## Simple TS Decomposition

In [None]:
ts.plot(),

In [None]:
import statsmodels.api as sm

result = sm.tsa.seasonal_decompose(ts, model='multiplicative', period=168)

result.plot();

In [None]:
result = sm.tsa.seasonal_decompose(ts, model='multiplicative', period=168)

result.plot();

## Residual Analysis

In [None]:
resid = result.resid

fig, ax= plt.subplots(figsize=(8,5))
resid.plot()
ax.set_xlabel('')
ax.set_xticks([], minor=True) 
ax.set_title('Residual plot')
sns.despine()
plt.show()

In [None]:
# Let's cheat a little bit ;)

industrial = ['n1', 'n25', 'n347', 'n4']

data_wo_ind = data.drop(industrial, axis=1)

flow = data_wo_ind.sum(axis=1)/1000

ts = flow.resample('1H').mean()

res = sm.tsa.seasonal_decompose(ts, model='mutliplicative', period=24*7)


res.plot(); 


In [None]:
# Plot the residuals
res.resid.plot()


residuals = res.resid.dropna() - 1

In [None]:
fig, ax= plt.subplots(figsize=(8,5))
residuals.plot(color=colors[1])
ax.set_xlabel('')
ax.set_xticks([], minor=True) 
ax.set_title('Residual plot')
sns.despine()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(8,5))
sm.graphics.tsa.plot_acf(residuals, lags=40, ax=ax)
sns.despine()
plt.show()

In [None]:
def hist(series):
    fig, ax= plt.subplots(figsize=(8,5))
    sns.distplot(series, ax=ax, hist_kws={'alpha': 0.8, 'edgecolor':'black', 'color': colors[1]},  
                 kde_kws={'color': 'black', 'alpha': 0.7})
    sns.despine()
    return fig, ax

hist(residuals)
plt.show()

In [None]:
sm.qqplot(residuals, line='s');

# Facebook Prophet

In [None]:
from fbprophet import Prophet

filename = 'data/Demands.csv'  # units are L/h
data = pd.read_csv(filename, sep=';', index_col=0, decimal=',', parse_dates=[0])



flow = data.sum(axis=1)/1000
flow

flow['2018-6'] = np.nan

flow['2018-8'] = np.nan

flow.plot()

In [None]:
ts = flow.resample('1H').mean()

df = ts.reset_index()

df


In [None]:
df = df.rename(columns={'Timestamp': 'ds', 0:'y'})
df

In [None]:
# Python
m = Prophet(seasonality_mode='multiplicative')
# m = Prophet(seasonality_mode='multiplicative', interval_width=0.95, mcmc_samples=300)
m.fit(df)

In [None]:
future = m.make_future_dataframe(periods=7)

forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [None]:
fig = m.plot(forecast);

In [None]:
m.plot_components(forecast);