In [1]:
import os
import sys
os.chdir('..')
sys.path.append(os.getcwd())

In [3]:
import torch
from utils.preprocess import *

from Models.pretrain import *
from utils.utils import (
    cohen_kappa,
    get_samples_outputs,
    get_pred_from_outputs,
)

import numpy as np
import pandas as pd
import pickle
import argparse
from torch.autograd import Variable
import copy


# Weather datasets


## CONFIGURATION:


In [9]:
# TODO
dataset='Weather'
dataset_name = "weather_pretraining"
batch_size = 128
hidden_size = 250
seq_len = 11
iterations = 10
output_size = 2
num_layers = 1
loss_on_seq = False
freeze_inputs_weights = False
pretraining_samples = 0
pretraining_epochs = 0
write_weights = False
combination = False
model_type = 'GRU' # or LSTM
if model_type == "GRU":
    model_class = GRU_Model
elif model_type == "LSTM":
    model_class = LSTM_Model




## Executable code:


In [None]:
df = pd.read_csv(f"datasets/{dataset}/{dataset_name}.csv")
df_task = df.drop('task', axis=1)
df_task

In [None]:
for itter in range(1,iterations+1):
  Model = model_class(input_size=4,
        device=torch.device("cpu"),
      	num_layers=num_layers,
        hidden_size=hidden_size,
        output_size=output_size,
        batch_size=batch_size,
          )
  Preprocess_object = Preprocess(seq_len=seq_len)
  with open(f"Performance/Pretrain/{model_type}/{dataset}/Before/{dataset}-{model_type}-pretrain-hidden{hidden_size}-epoch10_itter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)

  if len(df_task) % batch_size == 0:
    n_batches = int(len(df_task) / batch_size)
  else:
    n_batches = int(len(df_task) / batch_size) + 1
  optimizer = torch.optim.Adam(Model.parameters(), lr=0.01)
  loss_fn = torch.nn.CrossEntropyLoss(reduction="mean")
  out_h = None
  for j in range(0,10):
    for i in range(0, len(df_task), batch_size):
      x = df_task.iloc[i : i + batch_size, 0:-1].values.astype(np.float32)
      y = list(df_task.iloc[i : i + batch_size, -1])
      if len(y) >= seq_len:
        x = np.array(x)
        y = list(y)
        x, y, _ = Preprocess_object._load_batch(x, y)
        y_pred = Model(x)
        y_pred = get_samples_outputs(y_pred)
        pred, _ = get_pred_from_outputs(y_pred)
        loss = loss_fn(y_pred, y)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
  with open(f"Performance/Pretrain/{model_type}/{dataset}/After/{dataset}-{model_type}-pretrain-hidden{hidden_size}-epoch10_itter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)
  print(f'itter {itter} finished')

# SINE datasets


## CONFIGURATION:


In [16]:
# TODO
dataset='SINE'
dataset_name = "sine_rw10_mode5_extended_6-6_1234"
Task_Number = 1
batch_size = 128
hidden_size = 250
seq_len = 10
iterations = 10
output_size = 2
num_layers = 1
loss_on_seq = False
freeze_inputs_weights = False
pretraining_samples = 0
pretraining_epochs = 0
write_weights = False
combination = False
model_type = 'GRU' # or LSTM
if model_type == "GRU":
    model_class = GRU_Model
elif model_type == "LSTM":
    model_class = LSTM_Model


## Executable code:


In [None]:
df = pd.read_csv(f"datasets/{dataset}/{dataset_name}.csv")
df_task = df[df["task"] == Task_Number]
df_task = df_task.drop('task', axis=1)
df_task

In [None]:
for itter in range(1,iterations+1):
  Model = model_class(input_size=4,
        device=torch.device("cpu"),
      	num_layers=num_layers,
        hidden_size=hidden_size,
        output_size=output_size,
        batch_size=batch_size,
          )
  Preprocess_object = Preprocess(seq_len=seq_len)
  with open(f"Performance/Pretrain/{model_type}/{dataset}/Before/{dataset}-{model_type}-pretrain-hidden{hidden_size}-epoch10_iter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)

  if len(df_task) % batch_size == 0:
    n_batches = int(len(df_task) / batch_size)
  else:
    n_batches = int(len(df_task) / batch_size) + 1
  optimizer = torch.optim.Adam(Model.parameters(), lr=0.01)
  loss_fn = torch.nn.CrossEntropyLoss(reduction="mean")
  out_h = None
  for j in range(0,10):
    for i in range(0, len(df_task), batch_size):
      x = df_task.iloc[i : i + batch_size, 0:-1].values.astype(np.float32)
      y = list(df_task.iloc[i : i + batch_size, -1])
      if len(y) >= seq_len:
        x = np.array(x)
        y = list(y)
        x, y, _ = Preprocess_object._load_batch(x, y)
        y_pred = Model(x)
        y_pred = get_samples_outputs(y_pred)
        pred, _ = get_pred_from_outputs(y_pred)
        loss = loss_fn(y_pred, y)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
  with open(f"Performance/Pretrain/{model_type}/{dataset}/After/{dataset_name}-{model_type}-pretrain-hidden{hidden_size}-epoch10_iter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)
  print(f'itter {itter} finished')