In [None]:
!pip install pomdp-py

Collecting pomdp-py
  Downloading pomdp_py-1.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pomdp-py
Successfully installed pomdp-py-1.3.3


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## General setup

In [None]:
import pomdp_py
import numpy as np
import random

In [None]:
from collections import defaultdict

def tuple_default():
    return defaultdict((float, int))

In [None]:
import random

class Q_Learning_Table:
  def __init__(self, gamma, n, n_actions) -> None:
    self.q_tab = defaultdict(tuple_default)
    self.gamma = gamma
    self.n = n
    self.n_actions = n_actions

  def update(self, goal, o_t, a_t, r_t, new_o):
    #TODO: Ler sobre flattening policy
    a_v = self.q_tab[(goal, new_o)]
    best_a = None
    max_v = float('-inf')

    for a, (v, _) in a_v.items():
      if v > max_v:
        max_v = v
        best_a = a

    if not best_a:
      best_a = random.randint(1, self.n_actions)

    new_value = r_t + self.gamma * max_v

    if (goal, o_t) not in self.q_tab or (a_t not in self.q_tab[(goal, o_t)]):
      self.q_tab[(goal, o_t)][a_t] = new_value
    else:
      v, N = self.q_tab[(goal, o_t)][a_t]
      alpha = 1/N
      self.q_tab[(goal, o_t)][a_t] = (1 - alpha) * self.q_tab[(goal, o_t)][a_t] \
        + alpha * new_value


class ExperienceMemory:
  def __init__(self, state_value_tab : Q_Learning_Table) -> None:
    self.state_value_tab = state_value_tab
    self.experiences = []

  def add_experience(self, goal, o_t, a_t, r_t, new_o):
    self.experiences.append((goal, o_t, a_t, r_t, new_o))
    self.state_value_tab.update(goal, o_t, a_t, r_t, new_o)

  def __hash__(self):
      return hash(experiences)

  def __eq__(self, other):
    if isinstance(other, State):
      return self.experiences == other.experiences
    else:
      return False

  def __str__(self):
    return f"""Experience memory: {self.experiences}\n"""

  def __repr__(self):
    return f"""Experience memory: {self.experiences}\n"""


In [None]:
class State(pomdp_py.State):
    def __init__(self, exp_mem : ExperienceMemory):
        self.exp_mem = exp_mem

    def __hash__(self):
      return hash(self.exp_mem)

    def __eq__(self, other):
      if isinstance(other, State):
        return self.exp_mem == other.exp_mem
      else:
        return False

    def __str__(self):
      return f"""Experience memory: {self.exp_mem}\n"""

    def __repr__(self):
      return f"""Experience memory: {self.exp_mem}\n"""

In [None]:
class Action(pomdp_py.Action):
    """Simple named action."""
    def __init__(self, name):
        self.name = name
    def __hash__(self):
        return hash(self.name)
    def __eq__(self, other):
        if isinstance(other, Action):
            return self.name == other.name
        elif type(other) == str:
            return self.name == other
    def __str__(self):
        return self.name
    def __repr__(self):
        return "Action(%s)" % self.name

In [None]:
class Observation(pomdp_py.Observation):
    def __init__(self, screen: str):
        self.screen = screen

    def __hash__(self):
      return hash(self.screen)

    def __eq__(self, other):
        return self.screen == other.screen

    def __str__(self):
      return f"""Screen: {self.screen}\n"""

    def __repr__(self):
      return f"""Screen: {self.screen}\n"""

In [None]:
class ObservationModel(pomdp_py.ObservationModel):
  def probability(self, observation, next_state, action):
    #Vamos assumir que a observação, no caso a descrição da tela, é fidedigna.  Portanro, esta não é uma fonte de incerteza.
    return 1.0

  def sample(self, next_state: State, action):
    exp = next_state.exp_mem.experiences
    return exp[len(exp) - 1][1]

In [None]:
class TransitionModel(pomdp_py.TransitionModel):
  def probability(self, next_state, state, action):
    return 1.0 - 1e-9

  def sample(self, state, action):
    #TODO: Aqui, vamos precisar do env para executar a ação no novo estado
    pass


## WebShop Case Study

### Installation and setup (random policy)

In [None]:
#Util files
!git clone https://github.com/monilouise/IN1087.git

Cloning into 'IN1087'...
remote: Enumerating objects: 9, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 9 (delta 0), reused 9 (delta 0), pack-reused 0[K
Receiving objects: 100% (9/9), 26.89 KiB | 4.48 MiB/s, done.


In [None]:
!git clone https://github.com/princeton-nlp/webshop.git webshop

Cloning into 'webshop'...
remote: Enumerating objects: 395, done.[K
remote: Counting objects:   1% (1/57)[Kremote: Counting objects:   3% (2/57)[Kremote: Counting objects:   5% (3/57)[Kremote: Counting objects:   7% (4/57)[Kremote: Counting objects:   8% (5/57)[Kremote: Counting objects:  10% (6/57)[Kremote: Counting objects:  12% (7/57)[Kremote: Counting objects:  14% (8/57)[Kremote: Counting objects:  15% (9/57)[Kremote: Counting objects:  17% (10/57)[Kremote: Counting objects:  19% (11/57)[Kremote: Counting objects:  21% (12/57)[Kremote: Counting objects:  22% (13/57)[Kremote: Counting objects:  24% (14/57)[Kremote: Counting objects:  26% (15/57)[Kremote: Counting objects:  28% (16/57)[Kremote: Counting objects:  29% (17/57)[Kremote: Counting objects:  31% (18/57)[Kremote: Counting objects:  33% (19/57)[Kremote: Counting objects:  35% (20/57)[Kremote: Counting objects:  36% (21/57)[Kremote: Counting objects:  38% (22/57)[Kremote: Counting

In [None]:
!cp -f IN1087/webshop/requirements.txt webshop

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

⏬ Downloading https://github.com/conda-forge/miniforge/releases/download/23.1.0-1/Mambaforge-23.1.0-1-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:19
🔁 Restarting kernel...


In [None]:
%cd webshop
!./setup.sh -d small

/content/webshop
Collecting numpy==1.22.4
  Using cached numpy-1.22.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.2
    Uninstalling numpy-1.26.2:
      Successfully uninstalled numpy-1.26.2
Successfully installed numpy-1.22.4
\ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / done
Solving environment: \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ |

In [None]:
!conda install mkl=2021

| / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / 

In [None]:
import gym
from web_agent_site.envs import WebAgentTextEnv

env = gym.make('WebAgentTextEnv-v0', observation_mode='text', num_products=1000)

100%|██████████| 1000/1000 [00:00<00:00, 37036.45it/s]


Loaded 6910 goals.


You can set `disable_env_checker=True` to disable this check.[0m
  logger.warn(


In [None]:
import gym
from rich import print
from rich.markup import escape

from web_agent_site.envs import WebAgentTextEnv
from web_agent_site.models import RandomPolicy
from web_agent_site.utils import DEBUG_PROD_SIZE

env = gym.make('WebAgentTextEnv-v0', observation_mode='text', num_products=DEBUG_PROD_SIZE)
env.reset()

try:
    policy = RandomPolicy()

    observation = env.observation
    while True:
        print(observation)
        available_actions = env.get_available_actions()
        print('Available actions:', available_actions)
        action = policy.forward(observation, available_actions)
        observation, reward, done, info = env.step(action)
        print(f'Taking action "{escape(action)}" -> Reward = {reward}')
        if done:
            break
finally:
    env.close()

In [None]:
env.reset()
observation = env.observation
observation

  and should_run_async(code)


"WebShop [SEP] Instruction: [SEP] Find me slim fit, machine wash women's jumpsuits, rompers & overalls with short sleeve, high waist, polyester spandex for daily wear with color: green stripe, and size: large, and price lower than 50.00 dollars [SEP] Search"

In [None]:
env.get_available_actions()

  and should_run_async(code)


{'has_search_bar': True, 'clickables': ['search']}

In [None]:
observation

"Instruction: [SEP] Find me slim fit, machine wash women's jumpsuits, rompers & overalls with short sleeve, high waist, polyester spandex for daily wear with color: green stripe, and size: large, and price lower than 50.00 dollars [SEP] Back to Search [SEP] Page 1 (Total results: 0) [SEP] Next >"

In [None]:
observation, reward, done, info = env.step('click[back to search]')

In [None]:
observation

"WebShop [SEP] Instruction: [SEP] Find me slim fit, machine wash women's jumpsuits, rompers & overalls with short sleeve, high waist, polyester spandex for daily wear with color: green stripe, and size: large, and price lower than 50.00 dollars [SEP] Search"

In [None]:
observation, reward, done, info = env.step('search[bags]')

In [None]:
observation

"Instruction: [SEP] Find me slim fit, machine wash women's jumpsuits, rompers & overalls with short sleeve, high waist, polyester spandex for daily wear with color: green stripe, and size: large, and price lower than 50.00 dollars [SEP] Back to Search [SEP] Page 1 (Total results: 0) [SEP] Next >"

In [None]:
%cd /content/webshop

/content/webshop


In [None]:
!pip install numpy --upgrade

Collecting numpy
  Using cached numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.22.4
    Uninstalling numpy-1.22.4:
      Successfully uninstalled numpy-1.22.4
Successfully installed numpy-1.26.2
[0m

In [None]:
!./run_web_agent_text_env.sh

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Taking action [32m"click[0m[32m[[0m[32mnext >[0m[32m][0m[32m"[0m -> Reward = [1;36m0.0[0m
Instruction: [1m[[0mSEP[1m][0m Find me slim fit, machine wash women's jumpsuits, rompers & overalls with short 
sleeve, high waist, polyester spandex for daily wear with color: green stripe, and size: large, and 
price lower than [1;36m50.00[0m dollars [1m[[0mSEP[1m][0m Back to Search [1m[[0mSEP[1m][0m Page [1;36m2[0m [1m([0mTotal results: [1;36m0[0m[1m)[0m [1m[[0mSEP[1m][0m [1m<[0m[39m Prev [0m
[1;39m[[0m[39mSEP[0m[1;39m][0m[39m Next [0m[1m>[0m
Available actions:
[1m{[0m[32m'has_search_bar'[0m: [3;91mFalse[0m, [32m'clickables'[0m: [1m[[0m[32m'back to search'[0m, [32m'[0m[32m<[0m[32m prev'[0m[39m, [0m[32m'next [0m[32m>[0m[32m'[0m[1m][0m[1m}[0m
Taking action [32m"click[0m[32m[[0m[32mback to search[0m[32m][0m[32m"[0m -> Reward = [1;36m0.0[0m
W

### WebShop Baseline Models

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls /content/drive/MyDrive/IN1087/Projeto/webshop/webshop_ckpts

choice_il_epoch9.pth  search_il_checkpoint_800.zip


In [None]:
!mkdir -p ckpts/web_click/epoch_9/
!mkdir -p ckpts/web_search/

In [None]:
!cp /content/drive/MyDrive/IN1087/Projeto/webshop/webshop_ckpts/choice_il_epoch9.pth model.pth
!mv model.pth ckpts/web_click/epoch_9/
!cp /content/drive/MyDrive/IN1087/Projeto/webshop/webshop_ckpts/search_il_checkpoint_800.zip ckpts/web_search/
!unzip ckpts/web_search/search_il_checkpoint_800.zip

Archive:  ckpts/web_search/search_il_checkpoint_800.zip
   creating: checkpoint-800/
  inflating: __MACOSX/._checkpoint-800  
  inflating: checkpoint-800/rng_state.pth  
  inflating: checkpoint-800/optimizer.pt  
  inflating: checkpoint-800/config.json  
  inflating: checkpoint-800/scheduler.pt  
  inflating: checkpoint-800/training_args.bin  
  inflating: checkpoint-800/pytorch_model.bin  
  inflating: checkpoint-800/trainer_state.json  


In [None]:
%cd /content/webshop/baseline_models

/content/webshop/baseline_models


In [None]:
!pip install datasets

  and should_run_async(code)


Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
Collecting fsspec[http]<=2023.10.0,>=2023.1.0
  Downloading fsspec-2023.10.0-py3-none-any.whl (166 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.4/166.4 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp
  Downloading aiohttp-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting xxhash
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━

In [None]:
!python train_search_il.py

Downloading: 100% 878k/878k [00:00<00:00, 15.2MB/s]
Downloading: 100% 446k/446k [00:00<00:00, 10.4MB/s]
Downloading: 100% 26.0/26.0 [00:00<00:00, 69.5kB/s]
Downloading: 100% 1.59k/1.59k [00:00<00:00, 1.18MB/s]
train size: 1479
validation size: 68
test size: 704
all size: 12251
{'input_ids': tensor([    0, 11802,  1836,  1104,  1437, 20585,   112,    73,   306, 23595,
         3524,  6399,   251, 24150,  7714,  2999,  2137,    19, 23637, 11692,
         1755, 14187,     2,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
            1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
      

In [None]:
!pip install wandb

  and should_run_async(code)


Collecting wandb
  Downloading wandb-0.16.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting GitPython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.39.1-py2.py3-none-any.whl (254 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.1/254.1 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting appdirs>=1.4.3
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Collecting gitdb<

In [None]:
!python test.py --model_path /content/webshop/ckpts/web_click/epoch_9/ --bart_path /content/webshop/checkpoint-800/

  if LooseVersion(numpy.__version__) >= "1.19":
  other = LooseVersion(other)
Products loaded.
Keys cleaned.
Attributes loaded.
100% 1000/1000 [00:00<00:00, 74452.90it/s]
[1;36m0[0m skipped
Loaded 13 goals.
Traceback (most recent call last):
  File "/content/webshop/baseline_models/test.py", line 9, in <module>
    env = WebEnv(args, split='test')
  File "/content/webshop/baseline_models/env.py", line 21, in __init__
    self.env = WebAgentTextEnv(observation_mode=args.state_format, server=server,
  File "/content/webshop/baseline_models/../web_agent_site/envs/web_agent_text_env.py", line 75, in __init__
    self.feats = torch.load(FEAT_CONV)
  File "/usr/local/lib/python3.10/site-packages/torch/serialization.py", line 699, in load
    with _open_file_like(f, 'rb') as opened_file:
  File "/usr/local/lib/python3.10/site-packages/torch/serialization.py", line 231, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/usr/local/lib/python3.10/site-packages/torch/serial