Skip to content

Commit

Permalink
Add multivariate support
Browse files Browse the repository at this point in the history
  • Loading branch information
tejaslodaya committed Dec 20, 2018
1 parent 45e1600 commit e7b57a6
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 24 deletions.
34 changes: 24 additions & 10 deletions Timeseries_clustering.ipynb
Expand Up @@ -109,7 +109,7 @@
"latent_length = 20\n",
"batch_size = 32\n",
"learning_rate = 0.0005\n",
"n_epochs = 40\n",
"n_epochs = 1\n",
"dropout_rate = 0.2\n",
"optimizer = 'Adam' # options: ADAM, SGD\n",
"cuda = True # options: True, False\n",
Expand Down Expand Up @@ -170,6 +170,24 @@
"sequence_length = X_train.shape[1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Fetch `number_of_features` from dataset**\n",
"\n",
"This config corresponds to number of input features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"number_of_features = X_train.shape[2]"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -186,6 +204,7 @@
"outputs": [],
"source": [
"vrae = VRAE(sequence_length=sequence_length,\n",
" number_of_features = number_of_features,\n",
" hidden_size = hidden_size, \n",
" hidden_layer_depth = hidden_layer_depth,\n",
" latent_length = latent_length,\n",
Expand Down Expand Up @@ -213,7 +232,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"vrae.fit(train_dataset)\n",
Expand Down Expand Up @@ -278,13 +299,6 @@
"# If plotly to be used as rendering engine, uncomment below line\n",
"#plot_clustering(z_run, y_val, engine='plotly', download = False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -303,7 +317,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
5 changes: 3 additions & 2 deletions vrae/utils.py
Expand Up @@ -112,10 +112,11 @@ def open_data(direc, ratio_train=0.8, dataset="ECG5000"):
data_train = np.loadtxt(datadir + '_TRAIN', delimiter=',')
data_test_val = np.loadtxt(datadir + '_TEST', delimiter=',')[:-1]
data = np.concatenate((data_train, data_test_val), axis=0)
data = np.expand_dims(data, -1)

N, D = data.shape
N, D, _ = data.shape

ind_cut = int(ratio_train * N)
ind = np.random.permutation(N)
return data[ind[:ind_cut], 1:], data[ind[ind_cut:], 1:], data[ind[:ind_cut], 0], data[ind[ind_cut:], 0]
return data[ind[:ind_cut], 1:, :], data[ind[ind_cut:], 1:, :], data[ind[:ind_cut], 0, :], data[ind[ind_cut:], 0, :]

27 changes: 15 additions & 12 deletions vrae/vrae.py
Expand Up @@ -12,31 +12,33 @@ class Encoder(nn.Module):
"""
Encoder network containing enrolled LSTM/GRU
:param number_of_features: number of input features
:param hidden_size: hidden size of the RNN
:param hidden_layer_depth: number of layers in RNN
:param latent_length: latent vector length
:param dropout: percentage of nodes to dropout
:param block: LSTM/GRU block
"""
def __init__(self, hidden_size, hidden_layer_depth, latent_length, dropout, block = 'LSTM'):
def __init__(self, number_of_features, hidden_size, hidden_layer_depth, latent_length, dropout, block = 'LSTM'):

super(Encoder, self).__init__()

self.number_of_features = number_of_features
self.hidden_size = hidden_size
self.hidden_layer_depth = hidden_layer_depth
self.latent_length = latent_length

if block == 'LSTM':
self.model = nn.LSTM(1, self.hidden_size, self.hidden_layer_depth, dropout = dropout)
self.model = nn.LSTM(self.number_of_features, self.hidden_size, self.hidden_layer_depth, dropout = dropout)
elif block == 'GRU':
self.model = nn.GRU(1, self.hidden_size, self.hidden_layer_depth, dropout = dropout)
self.model = nn.GRU(self.number_of_features, self.hidden_size, self.hidden_layer_depth, dropout = dropout)
else:
raise NotImplementedError

def forward(self, x):
"""Forward propagation of encoder. Given input, outputs the last hidden state of encoder
:param x: input to the encoder, of shape (sequence_length, batch_size, 1)
:param x: input to the encoder, of shape (sequence_length, batch_size, number_of_features)
:return: last hidden state of encoder, of shape (batch_size, hidden_size)
"""

Expand Down Expand Up @@ -150,6 +152,7 @@ class VRAE(BaseEstimator, nn.Module):
"""Variational recurrent auto-encoder. This module is used for dimensionality reduction of timeseries
:param sequence_length: length of the input sequence
:param number_of_features: number of input features
:param hidden_size: hidden size of the RNN
:param hidden_layer_depth: number of layers in RNN
:param latent_length: latent vector length
Expand All @@ -166,7 +169,7 @@ class VRAE(BaseEstimator, nn.Module):
:param max_grad_norm: The grad-norm to be clipped
:param dload: Download directory where models are to be dumped
"""
def __init__(self, sequence_length, hidden_size=90, hidden_layer_depth=2, latent_length=20,
def __init__(self, sequence_length, number_of_features, hidden_size=90, hidden_layer_depth=2, latent_length=20,
batch_size=32, learning_rate=0.005, block='LSTM',
n_epochs=5, dropout_rate=0., optimizer='Adam', loss='MSELoss',
cuda=False, print_every=100, clip=True, max_grad_norm=5, dload='.'):
Expand All @@ -185,7 +188,8 @@ def __init__(self, sequence_length, hidden_size=90, hidden_layer_depth=2, latent
self.dtype = torch.cuda.FloatTensor


self.encoder = Encoder(hidden_size=hidden_size,
self.encoder = Encoder(number_of_features = number_of_features,
hidden_size=hidden_size,
hidden_layer_depth=hidden_layer_depth,
latent_length=latent_length,
dropout=dropout_rate,
Expand All @@ -199,7 +203,7 @@ def __init__(self, sequence_length, hidden_size=90, hidden_layer_depth=2, latent
hidden_size=hidden_size,
hidden_layer_depth=hidden_layer_depth,
latent_length=latent_length,
output_size=1,
output_size=number_of_features,
block=block,
dtype=self.dtype)

Expand Down Expand Up @@ -300,8 +304,8 @@ def _train(self, train_loader):
# Index first element of array to return tensor
X = X[0]

# required to swap axes, since dataloader gives output in (batch_size x seq_len)
X = X.permute(1,0).unsqueeze(-1)
# required to swap axes, since dataloader gives output in (batch_size x seq_len x num_of_features)
X = X.permute(1,0,2)

self.optimizer.zero_grad()
loss, recon_loss, kl_loss, _ = self.compute_loss(X)
Expand Down Expand Up @@ -395,13 +399,12 @@ def reconstruct(self, dataset, save = False):

for t, x in enumerate(test_loader):
x = x[0]
x = x.permute(1, 0).unsqueeze(-1)
x = x.permute(1, 0, 2)

x_decoded_each = self._batch_reconstruct(x)
x_decoded.append(x_decoded_each)

x_decoded = np.concatenate(x_decoded, axis=1)
x_decoded = x_decoded[:, :, 0].T

if save:
if os.path.exists(self.dload):
Expand Down Expand Up @@ -435,7 +438,7 @@ def transform(self, dataset, save = False):

for t, x in enumerate(test_loader):
x = x[0]
x = x.permute(1, 0).unsqueeze(-1)
x = x.permute(1, 0, 2)

z_run_each = self._batch_transform(x)
z_run.append(z_run_each)
Expand Down

0 comments on commit e7b57a6

Please sign in to comment.