In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import datetime

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# import seaborn as sns
import tensorflow as tf

from sklearn.metrics import mean_squared_error, mean_absolute_error

from timeLab.timelab import *

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

Init Plugin
Init Graph Optimizer
Init Kernel


In [3]:
zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True)

csv_path, _ = os.path.splitext(zip_path)

In [4]:
df = pd.read_csv(csv_path)

In [5]:
df = df.set_index('Date Time')

In [6]:
df.index = pd.to_datetime(df.index)

In [7]:
# inspect(df)

In [8]:
# Slice [start:stop:step], starting from index 5 take every 6th record.
df = pd.read_csv(csv_path)
df = df[5::6]

date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')

In [9]:
df.head()

Unnamed: 0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
5,996.5,-8.05,265.38,-8.78,94.4,3.33,3.14,0.19,1.96,3.15,1307.86,0.21,0.63,192.7
11,996.62,-8.88,264.54,-9.77,93.2,3.12,2.9,0.21,1.81,2.91,1312.25,0.25,0.63,190.3
17,996.84,-8.81,264.59,-9.66,93.5,3.13,2.93,0.2,1.83,2.94,1312.18,0.18,0.63,167.2
23,996.99,-9.05,264.34,-10.02,92.6,3.07,2.85,0.23,1.78,2.85,1313.61,0.1,0.38,240.0
29,997.46,-9.63,263.72,-10.65,92.2,2.94,2.71,0.23,1.69,2.71,1317.19,0.4,0.88,157.0


In [10]:
wv = df['wv (m/s)']
bad_wv = wv == -9999.0
wv[bad_wv] = 0.0

max_wv = df['max. wv (m/s)']
bad_max_wv = max_wv == -9999.0
max_wv[bad_max_wv] = 0.0

# The above inplace edits are reflected in the DataFrame.
df['wv (m/s)'].min()

0.0

In [11]:
wv = df.pop('wv (m/s)')
max_wv = df.pop('max. wv (m/s)')

# Convert to radians.
wd_rad = df.pop('wd (deg)')*np.pi / 180

# Calculate the wind x and y components.
df['Wx'] = wv*np.cos(wd_rad)
df['Wy'] = wv*np.sin(wd_rad)

# Calculate the max wind x and y components.
df['max Wx'] = max_wv*np.cos(wd_rad)
df['max Wy'] = max_wv*np.sin(wd_rad)

In [12]:
timestamp_s = date_time.map(pd.Timestamp.timestamp)

In [13]:
day = 24*60*60
year = (365.2425)*day

df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

In [14]:
column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

num_features = df.shape[1]

In [15]:
train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

In [16]:
# Data windowing

In [17]:
# Single step models

In [18]:
print(len(train_df), len(val_df), len(test_df))

49063 14018 7010


In [19]:
df = train_df.append(val_df).append(test_df)

In [20]:
df.to_csv('tf_processed_data.csv')

In [25]:
df

Unnamed: 0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),Wx,Wy,max Wx,max Wy,Day sin,Day cos,Year sin,Year cos
5,0.945308,-1.982473,-2.041888,-1.918973,1.117102,-1.302851,-1.477323,-0.790424,-1.480036,-1.482697,2.218524,0.193409,0.221161,0.111140,0.217928,0.366111,1.366069,-0.061052,1.428434
11,0.959770,-2.078372,-2.138166,-2.060964,1.044617,-1.330143,-1.534354,-0.786272,-1.536190,-1.539035,2.325708,0.172987,0.222101,0.109458,0.227798,0.707200,1.224794,-0.060029,1.428424
17,0.986284,-2.070284,-2.132435,-2.045187,1.062738,-1.328843,-1.527225,-0.788348,-1.528703,-1.531992,2.323998,0.207983,0.276266,0.111218,0.324078,1.000100,1.000059,-0.059006,1.428412
23,1.004362,-2.098014,-2.161090,-2.096820,1.008375,-1.336641,-1.546235,-0.782121,-1.547420,-1.553119,2.358913,0.270343,0.195267,0.246907,0.145176,1.224850,0.707179,-0.057983,1.428400
29,1.061006,-2.165028,-2.232152,-2.187178,0.984214,-1.353535,-1.579503,-0.782121,-1.581113,-1.585982,2.446320,0.112264,0.350818,0.048640,0.402053,1.366133,0.366112,-0.056960,1.428388
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420521,1.629854,-1.165600,-1.281981,-1.428459,-0.235937,-0.996148,-1.251577,-0.499831,-1.262910,-1.264390,1.555401,-0.129648,0.148238,-0.119794,0.175300,-1.348713,0.425346,-0.065547,1.428472
420527,1.535849,-1.214127,-1.322097,-1.640728,-0.580840,-1.019541,-1.356133,-0.443788,-1.363986,-1.365328,1.583967,-0.060615,-0.214379,-0.123420,-0.251059,-1.192686,0.759926,-0.064524,1.428464
420533,1.510540,-1.370107,-1.475683,-1.649333,-0.181573,-1.087119,-1.358509,-0.549647,-1.367729,-1.370023,1.734122,-0.033443,0.415894,-0.157093,0.487113,-0.955374,1.042725,-0.063501,1.428456
420539,1.445460,-1.386283,-1.487144,-1.685190,-0.217815,-1.094917,-1.375143,-0.545496,-1.382704,-1.384107,1.734366,0.155773,0.116774,0.133325,0.115670,-0.652949,1.254471,-0.062478,1.428447


In [23]:
panel = from_data(df, 10, 5)

In [33]:
panel.x = panel.x.apply(lambda x: x.sum())

100%|██████████| 70077/70077 [01:05<00:00, 1063.73it/s]


In [51]:
panel

AttributeError: 'PanelSide' object has no attribute 'assets'

In [38]:
df = pd.read_pickle('processed.pkl')

In [40]:
block = from_dataframe(df)

In [45]:
block.split_assets()[0]

Unnamed: 0_level_0,LNC,LNC,LNC,LNC,LNC
Unnamed: 0_level_1,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2005-12-21,38.127885,39.324626,38.120589,39.207870,2190100.0
2005-12-22,39.185984,39.324630,38.879500,39.040039,1095500.0
2005-12-23,39.061931,39.397604,38.959771,39.324631,588800.0
2005-12-27,39.514374,39.514374,38.981676,39.237080,932700.0
2005-12-28,39.455973,39.696782,39.244353,39.310028,427200.0
...,...,...,...,...,...
2020-11-04,36.009998,36.900002,34.630001,35.029999,2770700.0
2020-11-05,36.000000,36.799999,35.099998,35.900002,3318600.0
2020-11-06,36.270000,36.689999,33.750000,34.070000,3075200.0
2020-11-09,38.349998,44.360001,38.310001,42.630001,6787600.0


In [None]:
panel.x

In [36]:
block = from_dataframe(df)

0    p
1    T
2    r
3    V
4    s
5    H
6    W
7    m
8    D
9    Y
dtype: object

In [21]:
lookback = 1
horizon = 1

panel = from_data(
                    df,
                    lookback,
                    horizon,
                    channels='T (degC)'
                )

In [None]:
baseline = Baseline(panel)

ytrue = panel.test.y.flatten()
ypred = baseline.predict().flatten()

ytrue = ytrue[panel.horizon:]
ypred = ypred[panel.horizon:]

In [None]:
print(mean_absolute_error(ytrue, ypred))
print(mean_squared_error(ytrue, ypred))

In [None]:
linear = LinearModel(panel)
linear.fit(epochs=1)

In [None]:
dense = DenseModel(panel)
dense.fit(epochs=1)

In [None]:
lookback = 3
horizon = 1

panel = from_data(
                    df,
                    lookback,
                    horizon,
                    channels='T (degC)'
                )

In [None]:
# linear = LinearModel(panel)
linear.fit(epochs=1)

In [None]:
dense = DenseModel(panel)
dense.fit(epochs=1)

In [None]:
conv = ConvModel(panel, kernel_size=panel.lookback)
conv.fit(epochs=1)

In [None]:
lookback = 27
horizon = 1

panel = from_data(
                    df,
                    lookback,
                    horizon,
                    channels='T (degC)'
                )

In [None]:
conv = ConvModel(panel, kernel_size=27)
conv.fit(epochs=5)

In [None]:
conv = ConvModel(panel, kernel_size=3)
conv.fit(epochs=5)

In [None]:
sep = SeparableConvModel(panel, kernel_size=3)
sep.fit(epochs=5)

In [None]:
df1 = df[["Day sin", "Day cos", "Year sin", "Year cos"]]
df2 = df[["Wx", "Wy", "max Wx", "max Wy"]]
df3 = df[["T (degC)", "Tpot (K)", "Tdew (degC)"]]
df4 = df[["VPmax (mbar)", "VPact (mbar)", "VPdef (mbar)"]]
df5 = df[["rho (g/m**3)", "H2OC (mmol/mol)", "sh (g/kg)", "rh (%)", "p (mbar)"]]

In [None]:
block = from_dataframes([df1, df2, df3, df4, df5])

In [None]:
lookback = 100
horizon = 1

panel = from_data(
                    block,
                    lookback,
                    horizon,
                    y_channels = 'T (degC)',
                )

In [None]:
baseline = Baseline(panel)

ytrue = panel.test.y.flatten()
ypred = baseline.predict().flatten()

ytrue = ytrue[panel.horizon:]
ypred = ypred[panel.horizon:]

In [None]:
print(mean_absolute_error(ytrue, ypred))
print(mean_squared_error(ytrue, ypred))

In [None]:
linear = LinearModel(panel)
linear.fit(epochs=1)

In [None]:
dense = DenseModel(panel, dense_units=100, dense_layers=5)
dense.fit(epochs=10)

In [None]:
conv = ConvModel(panel, conv_units=100, conv_layers=5)
conv.fit(epochs=10)