# Experimentation - CardTransactionFraudDetection

In [1]:
from env import FraudTransactionEnv
from models.model import Actor, Critic
import torch.optim as optim
import torch
import numpy as np

In [3]:
env = FraudTransactionEnv(csv_path='data/creditcard.csv')

state = env.reset()
print("State shape:", state.shape)

next_state, reward, done, info = env.step(1)
print(f"Reward: {reward}")

State shape: (30,)
Reward: -5.0


In [4]:
state_dim = state.shape[0]
action_dim = 2

In [5]:
state

array([ 1.25735000e+05,  2.14309589e+00, -3.86467292e-01, -1.71357875e+00,
       -4.28998965e-01,  2.44773347e-01, -5.18825899e-01, -1.62612459e-02,
       -1.40988812e-01,  9.53468894e-01, -5.70106552e-03, -1.69817784e+00,
       -6.53525832e-01, -1.33757513e+00,  4.38061390e-01,  1.02266152e-01,
        9.89071796e-02, -3.06497718e-01, -6.00358401e-01,  7.41126336e-01,
       -2.19463718e-01, -3.46699183e-01, -9.56167615e-01,  2.72665760e-01,
        4.56717020e-02, -2.87558579e-01,  1.02474867e+00, -1.46339558e-01,
       -7.72226850e-02,  1.00000000e+01])

In [6]:
actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)
optimizer = optim.Adam(list(actor.parameters()) + list(critic.parameters()), lr = 0.001) 

In [None]:
torch.autograd.set_detect_anomaly(True)

for episode in range(1000):
    state = env.reset()
    done = False
    rewards = []
    log_probs = []
    values = []
    states = []
    actions = []

    counter = 1
    while not done:
        state_tensor = state_tensor = torch.tensor(state, dtype=torch.float32)
        probs = actor(state_tensor)
        dist = torch.distributions.Categorical(probs)
        action = dist.sample()
        log_prob = dist.log_prob(action)
        value = critic(state_tensor)

        next_state, reward, done, _ = env.step(action.item())
        rewards.append(reward)
        log_probs.append(log_prob)
        values.append(value)
        states.append(state_tensor)
        actions.append(action)

        state = next_state
        counter += 1
        if counter % 17 == 0:
            done = True

    # Calcul des retours
    returns = []
    G = 0
    for r in reversed(rewards):
        G = r + 0.99 * G
        returns.insert(0, G)
    returns = torch.tensor(returns, dtype=torch.float).unsqueeze(1)
    values = torch.cat(values)
    advantages = returns - values.detach()

    # Mise à jour des modèles
    for t in range(len(rewards)):
        state_tensor = states[t]
        probs = actor(state_tensor)
        dist = torch.distributions.Categorical(probs)

        # utiliser la même action qu'au moment de la collecte
        new_log_prob = dist.log_prob(actions[t])
        ratio = torch.exp(new_log_prob - log_probs[t].detach())
        clipped_ratio = torch.clamp(ratio, 0.8, 1.2)

        advantage = advantages[t]

        surrogate1 = ratio * advantage
        surrogate2 = clipped_ratio * advantage
        policy_loss = -torch.min(surrogate1, surrogate2)

        value = critic(state_tensor).squeeze()
        return_t = returns[t]
        value_loss = (value - return_t).pow(2)

        value_loss = value_loss.mean() if value_loss.ndim > 0 else value_loss

        entropy = dist.entropy().mean()
        policy_loss = policy_loss.mean()
        value_loss = value_loss.mean()
        loss = policy_loss + 0.5 * value_loss - 0.01 * entropy

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Episode {episode + 1}, Récompense: {sum(rewards)}")


Episode 1, Récompense: 16.0
Episode 2, Récompense: 16.0
Episode 3, Récompense: 16.0
Episode 4, Récompense: 5.0
Episode 5, Récompense: 16.0
Episode 6, Récompense: 16.0
Episode 7, Récompense: 16.0
Episode 8, Récompense: 16.0
Episode 9, Récompense: 16.0
Episode 10, Récompense: 16.0
Episode 11, Récompense: 16.0
Episode 12, Récompense: 16.0
Episode 13, Récompense: 16.0
Episode 14, Récompense: 16.0
Episode 15, Récompense: 16.0
Episode 16, Récompense: 16.0
Episode 17, Récompense: 16.0
Episode 18, Récompense: 16.0
Episode 19, Récompense: 16.0
Episode 20, Récompense: 16.0
Episode 21, Récompense: 16.0
Episode 22, Récompense: 16.0
Episode 23, Récompense: 16.0
Episode 24, Récompense: 16.0
Episode 25, Récompense: 16.0
Episode 26, Récompense: 16.0
Episode 27, Récompense: 16.0
Episode 28, Récompense: 16.0
Episode 29, Récompense: 16.0
Episode 30, Récompense: 16.0
Episode 31, Récompense: 16.0
Episode 32, Récompense: 16.0
Episode 33, Récompense: 16.0
Episode 34, Récompense: 16.0
Episode 35, Récompense: 

In [None]:
### Teest

state = env.reset()
done = False
total_reward = 0

while not done:
    state_tensor = one_hot(state).to(torch.float)
    
    with torch.no_grad():
        probs = actor(state_tensor)
    
    action = torch.argmax(probs).item()  # greedy choice (exploitation)
    
    state, reward, done, _, _ = env.step(action)
    total_reward += reward

print("Reward obtenue :", total_reward)

state_tensor = one_hot(state).to(torch.float)

with torch.no_grad():
    value = critic(state_tensor)

print("Valeur estimée de l’état :", value.item())


In [None]:
torch.save(actor.state_dict(), "models/actor.pth")
torch.save(critic.state_dict(), "models/critic.pth")


In [None]:
actor.load_state_dict(torch.load("actor_frozenlake.pth"))
critic.load_state_dict(torch.load("critic_frozenlake.pth"))
actor.eval()
critic.eval()