In [1]:
%cd ../

/home/tk/repos/explicit-memory


In [6]:
import gym
from gym import spaces

from memory import Memory, EpisodicMemory, SemanticMemory
from memory.environments import MemorySpace, EpisodicMemorySpace, SemanticMemorySpace, OQAGenerator


In [None]:
class MemoryEnv(gym.Env):
    metadata = {"render.modes": ["console"]}

    def __init__(
        self,
        capacity: int,
        memory_type: str,
        max_history: int = 1024,
        semantic_knowledge_path: str = "./data/semantic-knowledge.json",
        names_path: str = "./data/top-human-names",
        weighting_mode: str = "highest",
        commonsense_prob: float = 0.5,
    ) -> None:
        """

        Args
        ----
        capacity: memory capacity
        memory_type: either episodic or semantic.
        max_history: maximum history of observations.
        semantic_knowledge_path: path to the semantic knowledge generated from
            `collect_data.py`
        names_path: The path to the top 20 human name list.
        weighting_mode: "highest" chooses the one with the highest weight, "weighted"
            chooses all of them by weight, and null chooses every single one of them
            without weighting.
        commonsense_prob: the probability of an observation being covered by a
            commonsense

        """
        super().__init__()
        assert memory_type in ["episodic", "semantic"]
        self.memory_type = memory_type
        self.capacity = capacity
        self.oqag = OQAGenerator(
            max_history,
            semantic_knowledge_path,
            names_path,
            weighting_mode,
            commonsense_prob,
        )
        n_actions = self.capacity + 1
        self.action_space = spaces.Discrete(n_actions)
        self.observation_space = MemorySpace(
            capacity,
            memory_type,
            max_history,
            semantic_knowledge_path,
            names_path,
            weighting_mode,
            commonsense_prob,
        )

    def reset(self):
        self.oqag.reset()
        while True:
            ob, question_answer = self.oqag.generate()
            mem_epi = M_e.ob2epi(ob)


        for idx, ob in enumerate(self.data[self.split]):
            self.idx = idx
            mem_epi = self.M_dummy.ob2epi(ob)
            self.M_dummy.add(mem_epi)

            if self.M_dummy.is_kinda_full:
                break

        assert idx == self.capacity

        return self.M_dummy.entries

    def step(self, action):
        mem_to_forget = self.M_dummy.entries[action]

        self.M_dummy.forget(mem_to_forget)

        question = select_a_question(self.idx, self.data, self.questions, self.split)

        reward, _, _ = self.M_dummy.answer_latest(question)

        self.idx += 1
        if self.idx == len(self.data[self.split]):
            done = True
        else:
            done = False

            ob = self.data[self.split][self.idx]
            mem_epi = self.M_dummy.ob2epi(ob)
            self.M_dummy.add(mem_epi)

            assert self.M_dummy.is_kinda_full

        info = {}

        return self.M_dummy.entries, reward, done, info

    def render(self, mode="console"):
        if mode != "console":
            raise NotImplementedError()
        else:
            print(self.M_dummy.entries)

    def close(self):
        pass


In [None]:
from stable_baselines.common.env_checker import check_env

In [None]:
env = MemoryEnv('episodic', 4, 'train')
# If the environment don't follow the interface, an error will be thrown
check_env(env, warn=True)

In [None]:
env = MemoryEnv('episodic', 4, 'val')

obs = env.reset()
env.render()

print(env.observation_space)
print(env.action_space)
print(env.action_space.sample())

n_steps = 1200

for step in range(n_steps):
    print("Step {}".format(step + 1))
    obs, reward, done, info = env.step(0)
    print('obs=', obs, 'reward=', reward, 'done=', done)
    env.render()
    if done:
        print("Goal reached!", "reward=", reward)
        break

In [None]:
info

In [None]:
done

In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base')

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
outputs = model(**inputs, labels=labels)
loss = outputs.loss
logits = outputs.logits

In [None]:
from memory import EpisodicMemory

capacities = [
2,
4, 0),
    (8, 0),
    (16, 0),
    (32, 0),
    (64, 0),
    (128, 0),
    (256, 0),
    (512, 0),
    (1024, 0),
]

results = {"train": {}, "val": {}, "test": {}}

for split in ["train", "val", "test"]:

    M_e = EpisodicMemory(capacity["episodic"])
    rewards = 0

    for step, ob in enumerate(data[split]):
        mem_epi = M_e.ob2epi(ob)
        M_e.add(mem_epi)
        if M_e.is_kinda_full:
            if policy["forget"].lower() == "oldest":
                M_e.forget_oldest()
            elif policy["forget"].lower() == "random":
                M_e.forget_random()
            else:
                raise NotImplementedError

        question = select_a_question(step, data, questions, split)

        if policy["answer"].lower() == "latest":
            reward, _, _ = M_e.answer_latest(question)
        elif policy["answer"].lower() == "random":
            reward, _, _ = M_e.answer_random(question)
        else:
            raise NotImplementedError

        rewards += reward

    results[split]["rewards"] = rewards
    results[split]["num_samples"] = len(data[split])
    results[split]["episodic_memories"] = M_e.entries

    logging.info(f"results so far: {results}")

logging.info("episodic only training done!")

In [None]:
episodic only

capacity