Passage de paramètres via un dictionnaire et `**kwargs`

In [1]:
def f1(a, b, c=-1, d=-2):
    print(a, b, c, d)

def f2(a, b, f1_args):
    f1(a, b, **f1_args)

f1(1, 2)
f2(1, 2, {'c': 3})
f2(1, 2, {'c': 3, 'd': 4})

1 2 -1 -2
1 2 3 -2
1 2 3 4


In [7]:
class C1:
    def __init__(self, a=1, b=2, c=-1, d=-2):
        self.a = a
        self.b = b
        self.c = c
        self.d = d

    def __repr__(self):
        return f'C1(a={self.a}, b={self.b}, c={self.c}, d={self.d})'
    
class C2:
    def __init__(self, c1_args: dict):
        self.c1 = C1(**c1_args)

    def __repr__(self):
        return f'C2(c1={self.c1})'

c1 = C1()
print(c1)
c2 = C2({'a': 10, 'b': 20, 'c': 30})
print(c2)

C1(a=1, b=2, c=-1, d=-2)
C2(c1=C1(a=10, b=20, c=30, d=-2))


In [None]:
import numpy as np

def weighted_sample_without_replacement_numpy(seq, weights, k):
    """
    Tire au sort un échantillon de k éléments de seq sans remplacement,
    en respectant les poids fournis, en utilisant numpy.
    """
    seq = np.array(seq)
    weights = np.array(weights, dtype=float)
    if k > len(seq):
        raise ValueError("k ne peut pas être supérieur à la taille de la séquence")
    weights = weights / weights.sum()
    indices = np.random.choice(len(seq), size=k, replace=False, p=weights)
    return seq[indices]

In [None]:
import numpy as np
import torch

class PrioritizedReplayBuffer:
    def __init__(self, capacity, alpha=0.6, epsilon=1e-6):
        self.capacity = capacity
        self.alpha = alpha
        self.epsilon = epsilon
        self.states = np.zeros((capacity,), dtype=object)
        self.actions = np.zeros((capacity,), dtype=int)
        self.rewards = np.zeros((capacity,), dtype=float)
        self.next_states = np.zeros((capacity,), dtype=object)
        self.dones = np.zeros((capacity,), dtype=bool)
        self.priorities = np.zeros((capacity,), dtype=float)
        self.pos = 0
        self.size = 0

    def __len__(self):
        return self.size
    
    def add(self, state, action, reward, next_state, done, priority=1.0):
        idx = self.pos
        self.states[idx] = state
        self.actions[idx] = action
        self.rewards[idx] = reward
        self.next_states[idx] = next_state
        self.dones[idx] = done
        self.priorities[idx] = max(1, priority + self.epsilon) ** self.alpha
        self.pos = (self.pos + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)

    def sample(self, batch_size, beta=0.4):
        if self.size == 0:
            raise ValueError("Buffer is empty")
        priorities = self.priorities[:self.size]
        probs = priorities / priorities.sum()

        indices = np.random.choice(self.size, batch_size, p=probs)

        weights = (self.size * probs[indices]) ** (-beta)
        weights /= weights.max() # because "For stability reasons", page 5 in T. Schaul, J. Quan, I. Antonoglou, and D. Silver, ‘Prioritized Experience Replay’, Feb. 25, 2016, arXiv: arXiv:1511.05952. doi: 10.48550/arXiv.1511.05952.


        states = torch.from_numpy(self.states[indices]).float().to(self.device)
        actions = torch.from_numpy(self.actions[indices]).long().to(self.device)
        rewards = torch.from_numpy(self.rewards[indices]).float().to(self.device)
        next_states = torch.from_numpy(self.next_states[indices]).float().to(self.device)
        dones = torch.from_numpy(self.dones[indices].astype(np.uint8)).float().to(self.device)
        indices = torch.from_numpy(indices).long().to(self.device)
        weights = torch.from_numpy(weights).float().to(self.device)

        return (states, actions, rewards, next_states, dones, indices, weights)

    def update_priorities(self, indices, priorities):
        for idx, priority in zip(indices, priorities):
            self.priorities[idx] = priority ** self.alpha