In [2]:
%cd ../

/home/tk/repos/explicit-memory


In [11]:
import gym
from gym import spaces
from memory.environments import OQAGenerator, MemorySpace
from memory import Memory, EpisodicMemory, SemanticMemory


class EpisodicMemoryManage(gym.Env):
    """Custom Memory environment compatiable with the gym interface."""

    metadata = {"render.modes": ["console"]}

    def __init__(
        self,
        capacity: dict,
        max_history: int = 1024,
        semantic_knowledge_path: str = "./data/semantic-knowledge.json",
        names_path: str = "./data/top-human-names",
        weighting_mode: str = "highest",
        commonsense_prob: float = 0.5,
    ) -> None:
        """

        Args
        ----
        capacity: memory capacity
            e.g., {'episodic': 42, 'semantic: 0}
        max_history: maximum history of observations.
        semantic_knowledge_path: path to the semantic knowledge generated from
            `collect_data.py`
        names_path: The path to the top 20 human name list.
        weighting_mode: "highest" chooses the one with the highest weight, "weighted"
            chooses all of them by weight, and null chooses every single one of them
            without weighting.
        commonsense_prob: the probability of an observation being covered by a
            commonsense

        """
        super().__init__()
        assert capacity["semantic"] == 0
        self.capacity = capacity
        self.oqag = OQAGenerator(
            max_history,
            semantic_knowledge_path,
            names_path,
            weighting_mode,
            commonsense_prob,
        )
        self.n_actions = self.capacity["episodic"] + 1
        self.action_space = spaces.Discrete(self.n_actions)
        self.M_e = EpisodicMemory(self.capacity["episodic"])
        space_type = "episodic_memory_manage"
        self.me_max = self.M_e.capacity + 1

        self.observation_space = MemorySpace(
            capacity,
            space_type,
            max_history,
            semantic_knowledge_path,
            names_path,
            weighting_mode,
            commonsense_prob,
        )

    def reset(self):
        self.oqag.reset()
        self.M_e.forget_all()

        ob, _ = self.oqag.generate(generate_qa=False)
        mem_epi = self.M_e.ob2epi(ob)
        self.M_e.add(mem_epi)

        return self.observation_space.episodic_memory_system_to_numbers(
            self.M_e, self.me_max
        )

    def step(self, action):
        if self.M_e.is_kinda_full:
            mem_to_forget = self.M_e.entries[action]
            self.M_e.forget(mem_to_forget)

        qa = self.oqag.generate_question_answer()

        reward, _, _ = self.M_e.answer_latest(qa)

        if self.oqag.is_full:
            done = True
        else:
            done = False

        info = {}

        ob, _ = self.oqag.generate(generate_qa=False)
        mem_epi = self.M_e.ob2epi(ob)
        self.M_e.add(mem_epi)

        next_state = self.observation_space.episodic_memory_system_to_numbers(
            self.M_e, self.me_max
        )

        return next_state, reward, done, info

    def render(self, mode="console"):
        if mode != "console":
            raise NotImplementedError()
        else:
            print(self.M_e.entries)

    def close(self):
        pass


In [12]:
foo = EpisodicMemoryManage(capacity={'episodic':10, 'semantic': 0})

2021-11-23 22:31:34.868 INFO environments - load_semantic_knowledge: semantic knowledge successfully loaded from ./data/semantic-knowledge.json!
2021-11-23 22:31:34.869 INFO environments - read_names: Reading ./data/top-human-names complete! There are 20 names in total
2021-11-23 22:31:34.870 INFO environments - __init__: An Observation-Question-Answer generator object is generated!
2021-11-23 22:31:34.871 INFO environments - load_semantic_knowledge: semantic knowledge successfully loaded from ./data/semantic-knowledge.json!
2021-11-23 22:31:34.872 INFO environments - read_names: Reading ./data/top-human-names complete! There are 20 names in total
2021-11-23 22:31:34.872 INFO environments - __init__: An Observation-Question-Answer generator object is generated!


In [17]:
foo.reset()


2021-11-23 22:32:05.918 INFO environments - reset: Reseting the history is done!
2021-11-23 22:32:05.920 INFO environments - generate_observation: A new observation generated: ["James's mouse", 'AtLocation', "James's closet", 1637703125.9201865]
2021-11-23 22:32:05.920 INFO environments - add_observation_to_history: observation ["James's mouse", 'AtLocation', "James's closet", 1637703125.9201865] is added to history!
2021-11-23 22:32:05.920 INFO environments - generate: The new observation is added to the history.
2021-11-23 22:32:05.921 INFO memory - ob2epi: Observation ["James's mouse", 'AtLocation', "James's closet", 1637703125.9201865] is now a episodic memory ["James's mouse", 'AtLocation', "James's closet", 1637703125.9201865]
2021-11-23 22:32:05.921 INFO memory - add: memory entry ["James's mouse", 'AtLocation', "James's closet", 1637703125.9201865] added. Now there are in total of 1 memories!
2021-11-23 22:32:05.921 INFO environments - episodic_memory_system_to_numbers: The epi

array([[1.0400000e+02, 1.0300000e+03, 1.0000000e+01, 1.0400000e+02,
        1.0032000e+04, 1.6377032e+09],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00,

In [65]:
next_state, reward, done, info = foo.step(0)
print(reward)

2021-11-23 22:34:14.375 INFO memory - forget: ["Jessica's bear", 'AtLocation', "Jessica's countryside", 1637703211.2613502] forgotten!
2021-11-23 22:34:14.377 INFO environments - generate_question_answer: Generated question and answer is ["William's toothbrush", 'AtLocation', "William's suitcase"]
2021-11-23 22:34:14.377 INFO memory - is_question_valid: ["William's toothbrush", 'AtLocation', "William's suitcase"] is a valid question.
2021-11-23 22:34:14.378 INFO memory - answer_latest: no relevant memories found.
2021-11-23 22:34:14.378 INFO memory - answer_latest: pred: None, correct answer: suitcase. Reward: 0
2021-11-23 22:34:14.379 INFO environments - generate_observation: A new observation generated: ["Karen's elephant", 'AtLocation', "Karen's tree", 1637703254.3793077]
2021-11-23 22:34:14.379 INFO environments - add_observation_to_history: observation ["Karen's elephant", 'AtLocation', "Karen's tree", 1637703254.3793077] is added to history!
2021-11-23 22:34:14.380 INFO environme

0


In [66]:
foo.observation_space.numbers_to_episodic_memories(next_state)

2021-11-23 22:34:15.094 INFO environments - numbers_to_episodic_memories: The numpy array has been converted to episodic memories!


[["Richard's toothbrush", 'AtLocation', "Richard's suitcase", 1637703200.0],
 ["James's truck", 'AtLocation', "James's garage", 1637703200.0],
 ["James's cat", 'AtLocation', "James's harbor", 1637703200.0],
 ["Jessica's airplane", 'AtLocation', "Jessica's home", 1637703200.0],
 ["William's microwave", 'AtLocation', "William's kitchen", 1637703200.0],
 ["Jennifer's bird", 'AtLocation', "Jennifer's sky", 1637703300.0],
 ["James's keyboard", 'AtLocation', "James's stove", 1637703300.0],
 ["Jennifer's mouse", 'AtLocation', "Jennifer's laboratory", 1637703300.0],
 ["Thomas's apple", 'AtLocation', "Thomas's fridge", 1637703300.0],
 ["Michael's umbrella", 'AtLocation', "Michael's closet", 1637703300.0],
 ["Karen's elephant", 'AtLocation', "Karen's tree", 1637703300.0]]

In [None]:
class MemoryEnv(gym.Env):
    """Custom Memory environment compatiable with the gym interface."""
    metadata = {"render.modes": ["console"]}

    def __init__(
        self,
        capacity: int,
        memory_type: str,
        max_history: int = 1024,
        semantic_knowledge_path: str = "./data/semantic-knowledge.json",
        names_path: str = "./data/top-human-names",
        weighting_mode: str = "highest",
        commonsense_prob: float = 0.5,
    ) -> None:
        """

        Args
        ----
        capacity: memory capacity
        memory_type: either episodic or semantic.
        max_history: maximum history of observations.
        semantic_knowledge_path: path to the semantic knowledge generated from
            `collect_data.py`
        names_path: The path to the top 20 human name list.
        weighting_mode: "highest" chooses the one with the highest weight, "weighted"
            chooses all of them by weight, and null chooses every single one of them
            without weighting.
        commonsense_prob: the probability of an observation being covered by a
            commonsense

        """
        super().__init__()
        assert memory_type in ["episodic", "semantic"]
        self.memory_type = memory_type
        self.capacity = capacity
        self.oqag = OQAGenerator(
            max_history,
            semantic_knowledge_path,
            names_path,
            weighting_mode,
            commonsense_prob,
        )
        n_actions = self.capacity + 1
        self.action_space = spaces.Discrete(n_actions)
        self.observation_space = MemorySpace(
            capacity,
            memory_type,
            max_history,
            semantic_knowledge_path,
            names_path,
            weighting_mode,
            commonsense_prob,
        )

    def reset(self):
        self.oqag.reset()
        while True:
            ob, question_answer = self.oqag.generate()
            mem_epi = M_e.ob2epi(ob)


        for idx, ob in enumerate(self.data[self.split]):
            self.idx = idx
            mem_epi = self.M_dummy.ob2epi(ob)
            self.M_dummy.add(mem_epi)

            if self.M_dummy.is_kinda_full:
                break

        assert idx == self.capacity

        return self.M_dummy.entries

    def step(self, action):
        mem_to_forget = self.M_dummy.entries[action]

        self.M_dummy.forget(mem_to_forget)

        question = select_a_question(self.idx, self.data, self.questions, self.split)

        reward, _, _ = self.M_dummy.answer_latest(question)

        self.idx += 1
        if self.idx == len(self.data[self.split]):
            done = True
        else:
            done = False

            ob = self.data[self.split][self.idx]
            mem_epi = self.M_dummy.ob2epi(ob)
            self.M_dummy.add(mem_epi)

            assert self.M_dummy.is_kinda_full

        info = {}

        return self.M_dummy.entries, reward, done, info

    def render(self, mode="console"):
        if mode != "console":
            raise NotImplementedError()
        else:
            print(self.M_dummy.entries)

    def close(self):
        pass


In [None]:
from stable_baselines.common.env_checker import check_env

In [None]:
env = MemoryEnv('episodic', 4, 'train')
# If the environment don't follow the interface, an error will be thrown
check_env(env, warn=True)

In [None]:
env = MemoryEnv('episodic', 4, 'val')

obs = env.reset()
env.render()

print(env.observation_space)
print(env.action_space)
print(env.action_space.sample())

n_steps = 1200

for step in range(n_steps):
    print("Step {}".format(step + 1))
    obs, reward, done, info = env.step(0)
    print('obs=', obs, 'reward=', reward, 'done=', done)
    env.render()
    if done:
        print("Goal reached!", "reward=", reward)
        break

In [None]:
info

In [None]:
done

In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base')

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
outputs = model(**inputs, labels=labels)
loss = outputs.loss
logits = outputs.logits

In [None]:
from memory import EpisodicMemory

capacities = [
2,
4, 0),
    (8, 0),
    (16, 0),
    (32, 0),
    (64, 0),
    (128, 0),
    (256, 0),
    (512, 0),
    (1024, 0),
]

results = {"train": {}, "val": {}, "test": {}}

for split in ["train", "val", "test"]:

    M_e = EpisodicMemory(capacity["episodic"])
    rewards = 0

    for step, ob in enumerate(data[split]):
        mem_epi = M_e.ob2epi(ob)
        M_e.add(mem_epi)
        if M_e.is_kinda_full:
            if policy["forget"].lower() == "oldest":
                M_e.forget_oldest()
            elif policy["forget"].lower() == "random":
                M_e.forget_random()
            else:
                raise NotImplementedError

        question = select_a_question(step, data, questions, split)

        if policy["answer"].lower() == "latest":
            reward, _, _ = M_e.answer_latest(question)
        elif policy["answer"].lower() == "random":
            reward, _, _ = M_e.answer_random(question)
        else:
            raise NotImplementedError

        rewards += reward

    results[split]["rewards"] = rewards
    results[split]["num_samples"] = len(data[split])
    results[split]["episodic_memories"] = M_e.entries

    logging.info(f"results so far: {results}")

logging.info("episodic only training done!")

In [None]:
episodic only

capacity