In [1]:
import random
import numpy as np
import torch

def set_seed(seed: int):
    """Set the seed for reproducibility across multiple libraries."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(42)

# Data

In [2]:
import data_utils
train_path = "../data/absa/id/william/train.txt"
val_path = "../data/absa/id/william/dev.txt"
test_path = "../data/absa/id/william/test.txt"

train = data_utils.read_data(train_path)
val = data_utils.read_data(val_path)
test = data_utils.read_data(test_path)

In [3]:
train_tasks = [
    {
        "paradigm" : "extraction",
        "se_order" : "oa",
        "method" : "lego_absa"
    },
    {
        "paradigm" : "extraction",
        "se_order" : "as",
        "method" : "lego_absa"
    },
    {
        "paradigm" : "imputation",
        "reduced_se_order" : "oa",
        "se_order" : "oas",
        "method" : "lego_absa"
    },
    {
        "paradigm" : "imputation",
        "reduced_se_order" : "as",
        "se_order" : "oas",
        "method" : "lego_absa"
    },
]

val_tasks = [
    {
        "paradigm" : "extraction",
        "se_order" : "oas",
        "method" : "lego_absa"
    }
]

test_tasks = [
    {
        "paradigm" : "extraction",
        "se_order" : "oas",
        "method" : "lego_absa"
    }
]

In [4]:
train_ds = data_utils.data_gen(data=train, nt_se_order="aos", tasks=train_tasks, n_fold=4, algo="round_robin")
val_ds = data_utils.data_gen(data=val, nt_se_order="aos", tasks=val_tasks, n_fold=1, algo="round_robin")
test_tasks = data_utils.data_gen(data=test, nt_se_order="aos", tasks=test_tasks, n_fold=1, algo="round_robin")

100%|██████████| 12000/12000 [00:00<00:00, 13552.43it/s]
100%|██████████| 1000/1000 [00:00<00:00, 20212.54it/s]
100%|██████████| 1000/1000 [00:00<00:00, 15953.63it/s]


In [5]:
train_ds[0]

{'input': 'kamar saya ada kendala di ac tidak berfungsi optimal . dan juga wifi koneksi kurang stabil .| opinion : <extra_id_0> ,aspect : <extra_id_1>',
 'output': '<extra_id_0> tidak berfungsi optimal <extra_id_1> ac ; <extra_id_2> kurang stabil <extra_id_3> wifi koneksi',
 'se_order': 'oa'}

# Train