In [4]:
import os
import sys
os.chdir('..')
sys.path.append(os.getcwd())

In [5]:
import torch
from utils.preprocess import *

from Models.pretrain import *
from utils.utils import (
    cohen_kappa,
    get_samples_outputs,
    get_pred_from_outputs,
)

import numpy as np
import pandas as pd
import pickle
import argparse
from torch.autograd import Variable
import copy


# Weather datasets


## CONFIGURATION:


In [15]:
# TODO
dataset='Weather'
dataset_name = "weather_pretraining"
batch_size = 128
hidden_size = 300
seq_len = 11
iterations = 10
output_size = 2
input_size = 4
num_layers = 1
loss_on_seq = False
freeze_inputs_weights = False
pretraining_samples = 0
pretraining_epochs = 0
write_weights = False
combination = False
model_type = 'GRU' # or LSTM
if model_type == "GRU":
    model_class = GRU_Model
elif model_type == "LSTM":
    model_class = LSTM_Model




## Executable code:


In [16]:
df = pd.read_csv(f"datasets/{dataset}/{dataset_name}.csv")
df_task = df.drop('task', axis=1)
df_task

Unnamed: 0,RH,T_d,w_s,w_d,target
0,-0.878049,0.180556,0.142857,-1.333083,1
1,-0.878049,0.138889,0.571429,0.652632,1
2,-0.926829,0.111111,0.357143,-1.298496,1
3,-0.975610,0.125000,0.428571,-0.875188,1
4,-0.926829,0.138889,0.142857,-1.347368,1
...,...,...,...,...,...
24100,-0.707317,1.277778,0.428571,-1.346617,1
24101,0.560976,2.138889,0.714286,-0.121053,1
24102,-0.073171,2.041667,-0.142857,-0.309023,1
24103,-0.390244,1.722222,0.214286,-0.128571,1


In [17]:
for itter in range(1,iterations+1):
  Model = model_class(input_size=input_size,
        device=torch.device("cpu"),
      	num_layers=num_layers,
        hidden_size=hidden_size,
        output_size=output_size,
        batch_size=batch_size,
          )
  Preprocess_object = Preprocess(seq_len=seq_len)
  with open(f"Performance/Pretrain/{model_type}/{dataset}/Before/{dataset}-{model_type}-pretrain-hidden{hidden_size}-epoch10_itter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)

  if len(df_task) % batch_size == 0:
    n_batches = int(len(df_task) / batch_size)
  else:
    n_batches = int(len(df_task) / batch_size) + 1
  optimizer = torch.optim.Adam(Model.parameters(), lr=0.01)
  loss_fn = torch.nn.CrossEntropyLoss(reduction="mean")
  out_h = None
  for j in range(0,10):
    for i in range(0, len(df_task), batch_size):
      x = df_task.iloc[i : i + batch_size, 0:-1].values.astype(np.float32)
      y = list(df_task.iloc[i : i + batch_size, -1])
      if len(y) >= seq_len:
        x = np.array(x)
        y = list(y)
        x, y, _ = Preprocess_object._load_batch(x, y)
        y_pred, _ = Model(x)
        y_pred = get_samples_outputs(y_pred)
        pred, _ = get_pred_from_outputs(y_pred)
        loss = loss_fn(y_pred, y)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
  with open(f"Performance/Pretrain/{model_type}/{dataset}/After/{dataset}-{model_type}-pretrain-hidden{hidden_size}-epoch10_itter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)
  print(f'itter {itter} finished')

itter 1 finished
itter 2 finished
itter 3 finished
itter 4 finished
itter 5 finished
itter 6 finished
itter 7 finished
itter 8 finished
itter 9 finished
itter 10 finished


# SINE datasets


## CONFIGURATION:


In [55]:
# TODO
dataset='SINE'
dataset_name = "sine_rw10_mode5_extended_6-6_1234"
Task_Number = 4
batch_size = 128
hidden_size = 300
seq_len = 10
iterations = 10
output_size = 2
input_size = 2
num_layers = 1
loss_on_seq = False
freeze_inputs_weights = False
pretraining_samples = 0
pretraining_epochs = 0
write_weights = False
combination = False
model_type = 'GRU' # or LSTM
if model_type == "GRU":
    model_class = GRU_Model
elif model_type == "LSTM":
    model_class = LSTM_Model


## Executable code:


In [56]:
df = pd.read_csv(f"datasets/{dataset}/{dataset_name}.csv")
df_task = df[df["task"] == Task_Number]
df_task = df_task.drop('task', axis=1)
df_task

Unnamed: 0,x1,x2,target
150000,0.913390,0.380142,1
150001,0.944662,0.337085,1
150002,0.897754,0.339643,1
150003,0.919343,0.291981,1
150004,0.951719,0.315031,1
...,...,...,...
199995,0.574179,0.117611,0
199996,0.570672,0.159196,0
199997,0.608093,0.129590,0
199998,0.605501,0.143455,0


In [57]:
for itter in range(1,iterations+1):
  Model = model_class(input_size=input_size,
        device=torch.device("cpu"),
      	num_layers=num_layers,
        hidden_size=hidden_size,
        output_size=output_size,
        batch_size=batch_size,
          )
  Preprocess_object = Preprocess(seq_len=seq_len)
  with open(f"Performance/Pretrain/{model_type}/{dataset}/Before/{dataset}-Task_{Task_Number}-{model_type}-pretrain-hidden{hidden_size}-epoch10_iter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)

  if len(df_task) % batch_size == 0:
    n_batches = int(len(df_task) / batch_size)
  else:
    n_batches = int(len(df_task) / batch_size) + 1
  optimizer = torch.optim.Adam(Model.parameters(), lr=0.01)
  loss_fn = torch.nn.CrossEntropyLoss(reduction="mean")
  out_h = None
  for j in range(0,10):
    for i in range(0, len(df_task), batch_size):
      x = df_task.iloc[i : i + batch_size, 0:-1].values.astype(np.float32)
      y = list(df_task.iloc[i : i + batch_size, -1])
      if len(y) >= seq_len:
        x = np.array(x)
        y = list(y)
        x, y, _ = Preprocess_object._load_batch(x, y)
        y_pred, _ = Model(x)
        y_pred = get_samples_outputs(y_pred)
        pred, _ = get_pred_from_outputs(y_pred)
        loss = loss_fn(y_pred, y)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
  with open(f"Performance/Pretrain/{model_type}/{dataset}/After/{dataset}-Task_{Task_Number}-{model_type}-pretrain-hidden{hidden_size}-epoch10_iter{itter}.pickle", "wb") as fp:
    pickle.dump(Model.state_dict(), fp)
  print(f'itter {itter} finished')

itter 1 finished
itter 2 finished
itter 3 finished
itter 4 finished
itter 5 finished
itter 6 finished
itter 7 finished
itter 8 finished
itter 9 finished
itter 10 finished
