In [47]:
import os
import pandas as pd
path = "/Users/jorgerag/Documents/UCSD/courses/capstone/src/timeserires_transformer"
os.chdir(path)
pd.set_option('display.float_format', lambda x: '%.4f' % x)

Import data and parse it to work

In [48]:
df = pd.read_csv("../../data/photometry_analog/processed/model_data.csv")

In [49]:
unique_obj = df.groupby(["subject", "day"]).size().reset_index()
len(unique_obj)

33

In [132]:
train_df = df.groupby(["subject", "day"]).head(76).reset_index()
train_df['gcamp_lp_per_sec_zscore'] = train_df['gcamp_lp_per_sec_zscore'].round(0).astype('str')
test_df = df.groupby(["subject", "day"]).apply(lambda x : x.reset_index().iloc[76:86]).reset_index(drop=True)
train_df.to_csv('data/train_data.csv')
train_df['gcamp_lp_per_sec_zscore']

0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
2503    -1.0
2504    -1.0
2505    -1.0
2506    -1.0
2507    -0.0
Name: gcamp_lp_per_sec_zscore, Length: 2508, dtype: object

In [140]:
import utils
from torch.utils.data import DataLoader
import torch
import transformer_timeseries as tst
import numpy as np
import dataset as ds

# Hyperparams
test_size = 0.13157
batch_size = 76
target_col_name = "lp_met"
timestamp_col = "order"

## Params
dim_val = 512
n_heads = 8
n_decoder_layers = 4
n_encoder_layers = 4
dec_seq_len = 76 # length of input given to decoder
enc_seq_len = 76 # length of input given to encoder
output_sequence_length = 10 
window_size = enc_seq_len + output_sequence_length # used to slice data into sub-sequences
step_size = 1 # Step size, i.e. how many time steps does the moving window move at each step
in_features_encoder_linear_layer = 2048
in_features_decoder_linear_layer = 2048
max_seq_len = enc_seq_len
batch_first = False

# Define input variables 
exogenous_vars = [] # should contain strings. Each string must correspond to a column name
input_variables = [target_col_name] + exogenous_vars
target_idx = 0 # index position of target in batched trg_y

input_size = len(input_variables)

# Read data
data = utils.read_data(timestamp_col_name=timestamp_col)

# Remove test data from dataset
training_data = data[:-(round(len(data)*test_size))]

# Make list of (start_idx, end_idx) pairs that are used to slice the time series sequence into chunkc. 
# Should be training data indices only
training_indices = utils.get_indices_entire_sequence(
    data=training_data, 
    window_size=window_size, 
    step_size=step_size)

# Making instance of custom dataset class
training_data = ds.TransformerDataset(
    data=torch.tensor(training_data[input_variables].values).float(),
    indices=training_indices,
    enc_seq_len=enc_seq_len,
    dec_seq_len=dec_seq_len,
    target_seq_len=output_sequence_length
    )

# Making dataloader
training_data = DataLoader(training_data, batch_size)

i, batch = next(enumerate(training_data))

src, trg, trg_y = batch

# Permute from shape [batch size, seq len, num features] to [seq len, batch size, num features]
if batch_first == False:

    shape_before = src.shape
    src = src.permute(1, 0, 2)
    print("src shape changed from {} to {}".format(shape_before, src.shape))

    shape_before = trg.shape
    trg = trg.permute(1, 0, 2)
    print("src shape changed from {} to {}".format(shape_before, src.shape))

model = tst.TimeSeriesTransformer(
    input_size=len(input_variables),
    dec_seq_len=enc_seq_len,
    batch_first=batch_first,
    num_predicted_features=1
    )

# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    )

# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask( 
    dim1=output_sequence_length,
    dim2=output_sequence_length
    )

output = model(
    src=src,
    tgt=trg,
    src_mask=src_mask,
    tgt_mask=tgt_mask
    )

Reading file in data/train_data.csv
From get_src_trg: data size = torch.Size([2178, 1])
src shape changed from torch.Size([76, 76, 1]) to torch.Size([76, 76, 1])
src shape changed from torch.Size([76, 10, 1]) to torch.Size([76, 76, 1])


In [141]:
output.size()

torch.Size([10, 76, 1])

In [143]:
output

tensor([[[ 2.1080e-02],
         [ 1.8705e-01],
         [ 3.3952e-01],
         [ 3.8734e-01],
         [ 6.3265e-01],
         [-1.0303e-01],
         [ 5.8597e-01],
         [ 1.4115e-01],
         [ 2.9399e-01],
         [ 2.7081e-01],
         [ 1.3498e-01],
         [ 1.6099e-01],
         [-1.8264e-01],
         [ 4.1754e-01],
         [-4.6360e-01],
         [ 3.5915e-01],
         [ 5.2128e-01],
         [ 6.5262e-02],
         [ 1.3747e-01],
         [-5.0641e-03],
         [-2.6860e-01],
         [ 1.2587e-01],
         [-2.8364e-01],
         [ 7.0830e-01],
         [ 4.9614e-01],
         [-2.5660e-02],
         [-2.2140e-01],
         [ 8.6783e-02],
         [ 2.8065e-02],
         [ 1.0644e-01],
         [-2.8138e-01],
         [ 7.7847e-02],
         [ 2.3418e-01],
         [ 4.6180e-01],
         [ 7.2911e-01],
         [ 2.6973e-01],
         [ 6.3991e-01],
         [ 2.0764e-01],
         [ 7.7093e-01],
         [ 4.8172e-01],
         [ 9.2681e-01],
         [-1.838