# **Walker SAC**

### **Imports**

In [22]:
from rlkit.util import MultiVersionCheckpointer, Checkpointer, Logger, SimpleMetricModule
from config import *

import pandas as pd
import matplotlib.pyplot as plt
from env import create_env

# Import Torch
import torch
from torch import nn

# Models and Loss
from rlkit.models import MLP
from tensordict.nn import TensorDictModule
from torchrl.modules import ProbabilisticActor, TanhNormal
from tensordict.nn.distributions import NormalParamExtractor

# Util
from torchrl.objectives import SACLoss

In [6]:
def create_policy(model_config):
    model_config = model_config.copy()
    model_config["out_features"] *= 2
    model = MLP(**model_config)

    model = nn.Sequential(
        model,
        NormalParamExtractor()
    )
    model = TensorDictModule(model, in_keys=["observation"], out_keys=["loc", "scale"])
    
    policy = ProbabilisticActor(
        module=model,  
        distribution_class=TanhNormal,

        in_keys=["loc", "scale"],
        out_keys=["action"],

        return_log_prob=True,
        log_prob_key="log_prob",
        cache_dist=True,
    )

    return policy

def create_qvalue(model_config):
    # Remove out_features from config
    model_config = model_config.copy()
    model_config["in_features"] = model_config["in_features"] + model_config["out_features"]
    model_config["out_features"] = 1

    model = MLP(**model_config)
    qvalue = TensorDictModule(model, in_keys=["observation", "action"], out_keys=["state_action_value"])
    return qvalue

In [13]:
policy = create_policy(MODEL_CONFIG)
qvalue = create_qvalue(MODEL_CONFIG)

In [20]:
loss_module = SACLoss(
    actor_network=policy, qvalue_network=qvalue, value_network=None,
    num_qvalue_nets=2,
    alpha_init=0.1, fixed_alpha=True, 
    delay_actor=False, delay_qvalue=True
)

In [23]:
try: env.close()
except: pass
env = create_env(graphics=False, time_scale=10)

with torch.no_grad():
    data = env.rollout(100, policy=policy, break_when_any_done=False)

[UnityMemory] Configuration Parameters - Can be set up in boot.config
    "memorysetup-bucket-allocator-granularity=16"
    "memorysetup-bucket-allocator-bucket-count=8"
    "memorysetup-bucket-allocator-block-size=4194304"
    "memorysetup-bucket-allocator-block-count=1"
    "memorysetup-main-allocator-block-size=16777216"
    "memorysetup-thread-allocator-block-size=16777216"
    "memorysetup-gfx-main-allocator-block-size=16777216"
    "memorysetup-gfx-thread-allocator-block-size=16777216"
    "memorysetup-cache-allocator-block-size=4194304"
    "memorysetup-typetree-allocator-block-size=2097152"
    "memorysetup-profiler-bucket-allocator-granularity=16"
    "memorysetup-profiler-bucket-allocator-bucket-count=8"
    "memorysetup-profiler-bucket-allocator-block-size=4194304"
    "memorysetup-profiler-bucket-allocator-block-count=1"
    "memorysetup-profiler-allocator-block-size=16777216"
    "memorysetup-profiler-editor-allocator-block-size=1048576"
    "memorysetup-temp-allocator-siz

  source[group_name][agent_name]["truncated"] = torch.tensor(


In [25]:
data
loss_module(data)



TypeError: MLP.forward() takes 2 positional arguments but 3 were given

: 