In [1]:
import os
import sys
os.chdir('..')
sys.path.append(os.getcwd())

In [2]:
import torch
from Models.pretrain import *
from Models.cPB_SML import *
from Models.cPB import *
from river import stream
from evaluation.cl_evaluation import EvaluateContinualLearning
from evaluation.learner_config import LearnerConfig
from evaluation.prequential_evaluation import EvaluatePrequential, make_dir
import traceback
from evaluation.test_utils import *

import torch.nn.functional as F
import numpy as np
import pandas as pd
import pickle
import argparse
from torch.autograd import Variable
import pprint
import copy
import warnings
from Models.cgru import cGRULinear
warnings.filterwarnings("ignore", category=FutureWarning, module='torch.storage')
warnings.filterwarnings("ignore", category=UserWarning, module='torch.storage')
warnings.filterwarnings("ignore", category=DeprecationWarning, module='torch.storage')




In [3]:
class ModelLoader:
  def __init__(self,base_model,dataset):
      self.iteration = 1
      self.path = None
      self.base_model = base_model
      self.dataset = dataset

  def return_base_learner_pretrained(self):

      with open(f"pretrain/GRU/weather/before/{self.path + self.iteration}.pickle", "rb") as f:
          model = pickle.load(f)
      return model

  def return_base_learner(self,input_size,output_size,hidden_size,batch_size,device,many_to_one):
      with open(f"performance/pretrain/{self.base_model}/{self.dataset}/before/{self.path + str(self.iteration)}.pickle", "rb") as f:
          pretrained_model = pickle.load(f)
      model = cGRULinear(
            input_size=input_size,
            output_size=output_size,
            hidden_size=hidden_size,
            batch_size=batch_size,
            device=device,
            many_to_one=many_to_one
        )

        # Load the weights from the pretrained model into the cGRULinear instance
      model.load_state_dict(pretrained_model)
      self.iteration += 1


      return model

  def next_iteration(self):
      self.iteration += 1

  def set_path(self, path):
      self.path = path

# Weather datasets


In [4]:
dataset_name = 'weather_st124_3conf'
dataset = 'Weather'
SEQ_LEN = 11 # length of the sequence, 11 for Weather, 10 for Sine
ITERATIONS = 1  # number of experiments per each attempt. 
BATCH_SIZE = 128  # the batch size of periodic learners and classifiers.
base_model = 'GRU'
NUM_FEATURES = 4
NUM_CLASSES = 2
NUM_OLD_LABELS = SEQ_LEN - 1
MAX_SAMPLES = None
TRAIN_TEST = False
WRITE_CHECKPOINTS = False
DO_CL = False
ANYTIME_SCENARIO = True
PERIODIC_SCENARIO = True
hidden_size = 250
mask_weights = []
epoch_size=10
lr = 0.01
number_of_tasks=4
mask_selection_NofBatch = 50
mask_init='uniform'
pretrain_model_addr =''
many_to_one=True
pretrain_model_path = f'weather-{base_model}-pretrain-hidden{hidden_size}-epoch10_itter'

METRICS = ["accuracy", "kappa"]
PATHS = [
    f"datasets/Weather/{dataset_name}",
]  # a list containing the paths of the data streams (without the extension)


PATH_PERFORMANCE = f"Results/cGRU/hidden{hidden_size}"  # path to write the outputs of the evaluation
CALLBACK_FUNC = None  # function to call after each iteration (set it to None)
MODE = "local"  # 'local' or 'aws'. If 'aws', the messages will be written in a specific txt file in the output_file dir
OUTPUT_FILE = None
# the name of the output file in outputs dir. If None, it will use the name of the current data stream.
suffix = f""  # the suffix to add the files containing the evaluation results.

In [5]:



MODEL_LOADER = ModelLoader(dataset=dataset,base_model=base_model)

def create_acpnn_cgru():
  return cPNN(
      column_class=MODEL_LOADER.return_base_learner,
      device="cpu",
      seq_len=SEQ_LEN,
      train_verbose=False,
      acpnn=True,
      batch_size=BATCH_SIZE,
      input_size=NUM_FEATURES,
      output_size=2,
      hidden_size=250)

batch_learners = [
    LearnerConfig(
         name="cGRU",
         model=create_acpnn_cgru,
         numeric=True,
         batch_learner=True,
         drift=False,
         cpnn=True,
     )
    ]


anytime_learners = []

# The list of LearnerConfig specifying the anytime learners.
# The associated models are able to perform training and inference on single data points.
# They must implement the methods learn_one(x, y), predict_one(x).
# Use anytime_learners_sml to add all the SML models. Use [] if you are not interested in testing anytime learners.
# Otherwise, specify a custom list of LearnerConfig.
batch_learners = batch_learners
# The list of LearnerConfig specifying the periodic learners.
# The associated models are able to perform training only on mini-batches of data points.
# They must implement the methods learn_many(x, y), predict_many(x), predict_one(x).
# Use batch_learners_acpnn_qcpnn for the experiment on acpnn and qcpnn.
# Use batch_learners_fcpnn to run the single model for fcpnn experiment.
# Otherwise, specify a custom list of LearnerConfig.


# __________________
# CODE
# __________________

if OUTPUT_FILE is None:
    OUTPUT_FILE = PATHS[0].split("/")[-1]

initialize(NUM_OLD_LABELS, SEQ_LEN, NUM_FEATURES, BATCH_SIZE, ITERATIONS)
eval_cl = None


def create_iter_csv():
    return stream.iter_csv(str(PATH) + ".csv", converters=converters, target="target")

def next_iteration_callback(**kwargs):
    MODEL_LOADER.next_iteration()


CALLBACK_FUNC = next_iteration_callback


PATH = ""
if not PATH_PERFORMANCE.startswith("/"):
    PATH_PERFORMANCE = os.path.join("performance", PATH_PERFORMANCE)

orig_stdout = sys.stdout
f = None
if MODE == "aws":
    make_dir(f"outputs")
    f = open(f"outputs/{OUTPUT_FILE}.txt", "w", buffering=1)
    sys.stdout = f

try:
    for path in PATHS:
        PATH = path
        MODEL_LOADER.set_path(pretrain_model_path)
        current_path_performance = os.path.join(PATH_PERFORMANCE, PATH.split("/")[-1])
        make_dir(current_path_performance)

        if TRAIN_TEST:
            PATH_CL = PATH + "_test"
            PATH = PATH + "_train"
        else:
            PATH_CL = PATH
        df = pd.read_csv(f"{PATH}.csv", nrows=1)
        columns = list(df.columns)
        initial_task = df.iloc[0]["task"]
        columns.remove("target")
        columns.remove("task")
        converters = {c: float for c in columns}
        converters["target"] = int
        converters["task"] = int
        NUM_FEATURES = len(columns)
        data_stream = create_iter_csv

        initialize(NUM_OLD_LABELS, SEQ_LEN, NUM_FEATURES, BATCH_SIZE, ITERATIONS)
        print(PATH)
        print("BATCH SIZE, SEQ LEN:", BATCH_SIZE, SEQ_LEN)
        print("NUM OLD LABELS:", NUM_OLD_LABELS)
        print("TRAIN TEST:", TRAIN_TEST)
        print("ANYTIME LEARNERS:", [m.name for m in anytime_learners])
        print("BATCH LEARNERS:", [(m.name, m.drift) for m in batch_learners])
        print("SUFFIX:", suffix)
        print()

        eval_preq = EvaluatePrequential(
            max_data_points=MAX_SAMPLES,
            batch_size=BATCH_SIZE,
            metrics=METRICS,
            anytime_learners=anytime_learners,
            batch_learners=batch_learners,
            data_stream=data_stream,
            path_write=current_path_performance,
            train_test=TRAIN_TEST,
            suffix=suffix,
            write_checkpoints=WRITE_CHECKPOINTS,
            iterations=ITERATIONS,
            dataset_name=PATH.split("/")[-1],
            mode=MODE,
            anytime_scenario=ANYTIME_SCENARIO,
            periodic_scenario=PERIODIC_SCENARIO,
        )

        if PATH_CL is not None and DO_CL:
            eval_cl = EvaluateContinualLearning(
                path=PATH_CL,
                checkpoint=eval_preq.checkpoint,
                anytime_learners=anytime_learners,
                batch_learners=batch_learners,
                batch_size=BATCH_SIZE,
                path_write=current_path_performance,
                train_test=TRAIN_TEST,
                suffix=suffix,
            )

        initialize_callback(eval_cl, eval_preq)

        eval_preq.evaluate(callback=CALLBACK_FUNC, initial_task=initial_task)
        print()
except Exception:
    print(traceback.format_exc())
    if MODE == "aws":
        sys.stdout = orig_stdout
        f.close()
        print(traceback.format_exc())
print("\n\nEND.")
if MODE == "aws":
    sys.stdout = orig_stdout
    f.close()

datasets/Weather/weather_st124_3conf
BATCH SIZE, SEQ LEN: 128 11
NUM OLD LABELS: 10
TRAIN TEST: False
ANYTIME LEARNERS: []
BATCH LEARNERS: [('cGRU', False)]
SUFFIX: 

weather_st124_3conf 1/1 24100
DETECTED DRIFT: 24105
weather_st124_3conf 1/1 48200
DETECTED DRIFT: 48210
weather_st124_3conf 1/1 72300
DETECTED DRIFT: 72315
weather_st124_3conf 1/1 96400




END.


# SINE datasets


In [6]:
dataset_name = "sine_rw10_mode5_extended_16-16_1234"
dataset = 'SINE'
SEQ_LEN = 10 # length of the sequence, 11 for Weather, 10 for Sine
ITERATIONS = 1  # number of experiments per each attempt. 
BATCH_SIZE = 128  # the batch size of periodic learners and classifiers.
base_model = 'GRU'
NUM_FEATURES = 2
NUM_CLASSES = 2
NUM_OLD_LABELS = SEQ_LEN - 1
MAX_SAMPLES = None
TRAIN_TEST = False
WRITE_CHECKPOINTS = False
DO_CL = False
ANYTIME_SCENARIO = True
PERIODIC_SCENARIO = True
hidden_size = 250
mask_weights = []
epoch_size=10
lr = 0.01
number_of_tasks=4
mask_selection_NofBatch = 50
mask_init='uniform'
pretrain_model_addr =''
many_to_one=True
Pretrain_task= '1'


METRICS = ["accuracy", "kappa"]
PATHS = [
    f"datasets/SINE/{dataset_name}",
]  # a list containing the paths of the data streams (without the extension)

pretrain_model_path = f'sine-6_6-1234-t{Pretrain_task}-{base_model}-pretrain-hidden{hidden_size}-epoch10_itter'

PATH_PERFORMANCE = f"Results/cGRU/hidden{hidden_size}"  # path to write the outputs of the evaluation
CALLBACK_FUNC = None  # function to call after each iteration (set it to None)
MODE = "local"  # 'local' or 'aws'. If 'aws', the messages will be written in a specific txt file in the output_file dir
OUTPUT_FILE = None
# the name of the output file in outputs dir. If None, it will use the name of the current data stream.
suffix = f""  # the suffix to add the files containing the evaluation results.


In [7]:



MODEL_LOADER = ModelLoader(dataset=dataset,base_model=base_model)

def create_acpnn_cgru():
  return cPNN(
      column_class=MODEL_LOADER.return_base_learner,
      device="cpu",
      seq_len=SEQ_LEN,
      train_verbose=False,
      acpnn=True,
      batch_size=BATCH_SIZE,
      input_size=NUM_FEATURES,
      output_size=2,
      hidden_size=250)

batch_learners = [
    LearnerConfig(
         name="cGRU",
         model=create_acpnn_cgru,
         numeric=True,
         batch_learner=True,
         drift=False,
         cpnn=True,
     )
    ]


anytime_learners = []

# The list of LearnerConfig specifying the anytime learners.
# The associated models are able to perform training and inference on single data points.
# They must implement the methods learn_one(x, y), predict_one(x).
# Use anytime_learners_sml to add all the SML models. Use [] if you are not interested in testing anytime learners.
# Otherwise, specify a custom list of LearnerConfig.
batch_learners = batch_learners
# The list of LearnerConfig specifying the periodic learners.
# The associated models are able to perform training only on mini-batches of data points.
# They must implement the methods learn_many(x, y), predict_many(x), predict_one(x).
# Use batch_learners_acpnn_qcpnn for the experiment on acpnn and qcpnn.
# Use batch_learners_fcpnn to run the single model for fcpnn experiment.
# Otherwise, specify a custom list of LearnerConfig.


# __________________
# CODE
# __________________

if OUTPUT_FILE is None:
    OUTPUT_FILE = PATHS[0].split("/")[-1]

initialize(NUM_OLD_LABELS, SEQ_LEN, NUM_FEATURES, BATCH_SIZE, ITERATIONS)
eval_cl = None


def create_iter_csv():
    return stream.iter_csv(str(PATH) + ".csv", converters=converters, target="target")

def next_iteration_callback(**kwargs):
    MODEL_LOADER.next_iteration()


CALLBACK_FUNC = next_iteration_callback


PATH = ""
if not PATH_PERFORMANCE.startswith("/"):
    PATH_PERFORMANCE = os.path.join("performance", PATH_PERFORMANCE)

orig_stdout = sys.stdout
f = None
if MODE == "aws":
    make_dir(f"outputs")
    f = open(f"outputs/{OUTPUT_FILE}.txt", "w", buffering=1)
    sys.stdout = f

try:
    for path in PATHS:
        PATH = path
        MODEL_LOADER.set_path(pretrain_model_path)
        current_path_performance = os.path.join(PATH_PERFORMANCE, PATH.split("/")[-1])
        make_dir(current_path_performance)

        if TRAIN_TEST:
            PATH_CL = PATH + "_test"
            PATH = PATH + "_train"
        else:
            PATH_CL = PATH
        df = pd.read_csv(f"{PATH}.csv", nrows=1)
        columns = list(df.columns)
        initial_task = df.iloc[0]["task"]
        columns.remove("target")
        columns.remove("task")
        converters = {c: float for c in columns}
        converters["target"] = int
        converters["task"] = int
        NUM_FEATURES = len(columns)
        data_stream = create_iter_csv

        initialize(NUM_OLD_LABELS, SEQ_LEN, NUM_FEATURES, BATCH_SIZE, ITERATIONS)
        print(PATH)
        print("BATCH SIZE, SEQ LEN:", BATCH_SIZE, SEQ_LEN)
        print("NUM OLD LABELS:", NUM_OLD_LABELS)
        print("TRAIN TEST:", TRAIN_TEST)
        print("ANYTIME LEARNERS:", [m.name for m in anytime_learners])
        print("BATCH LEARNERS:", [(m.name, m.drift) for m in batch_learners])
        print("SUFFIX:", suffix)
        print()

        eval_preq = EvaluatePrequential(
            max_data_points=MAX_SAMPLES,
            batch_size=BATCH_SIZE,
            metrics=METRICS,
            anytime_learners=anytime_learners,
            batch_learners=batch_learners,
            data_stream=data_stream,
            path_write=current_path_performance,
            train_test=TRAIN_TEST,
            suffix=suffix,
            write_checkpoints=WRITE_CHECKPOINTS,
            iterations=ITERATIONS,
            dataset_name=PATH.split("/")[-1],
            mode=MODE,
            anytime_scenario=ANYTIME_SCENARIO,
            periodic_scenario=PERIODIC_SCENARIO,
        )

        if PATH_CL is not None and DO_CL:
            eval_cl = EvaluateContinualLearning(
                path=PATH_CL,
                checkpoint=eval_preq.checkpoint,
                anytime_learners=anytime_learners,
                batch_learners=batch_learners,
                batch_size=BATCH_SIZE,
                path_write=current_path_performance,
                train_test=TRAIN_TEST,
                suffix=suffix,
            )

        initialize_callback(eval_cl, eval_preq)

        eval_preq.evaluate(callback=CALLBACK_FUNC, initial_task=initial_task)
        print()
except Exception:
    print(traceback.format_exc())
    if MODE == "aws":
        sys.stdout = orig_stdout
        f.close()
        print(traceback.format_exc())
print("\n\nEND.")
if MODE == "aws":
    sys.stdout = orig_stdout
    f.close()

datasets/SINE/sine_rw10_mode5_extended_16-16_1234
BATCH SIZE, SEQ LEN: 128 10
NUM OLD LABELS: 9
TRAIN TEST: False
ANYTIME LEARNERS: []
BATCH LEARNERS: [('cGRU', False)]
SUFFIX: 

sine_rw10_mode5_extended_16-16_1234 1/1 50000
DETECTED DRIFT: 50000
sine_rw10_mode5_extended_16-16_1234 1/1 100000
DETECTED DRIFT: 100000
sine_rw10_mode5_extended_16-16_1234 1/1 150000
DETECTED DRIFT: 150000
sine_rw10_mode5_extended_16-16_1234 1/1 200000




END.
