# Experimentation - CardTransactionFraudDetection

In [23]:
from env import FraudTransactionEnv
from models.model import Actor, Critic
import torch.optim as optim
import torch

In [2]:
env = FraudTransactionEnv(csv_path='data/creditcard.csv')

state = env.reset()
print("State shape:", state.shape)

next_state, reward, done, info = env.step(1)
print(f"Reward: {reward}")


State shape: (30,)
Reward: -10.0


In [19]:
state_dim = state.shape[0]
action_dim = 2

In [29]:
state

array([ 6.87960000e+04, -8.63977001e-01,  6.12548956e-02,  2.42251402e+00,
        8.40051561e-01, -3.58034902e-01,  1.70425697e-01, -5.22626485e-01,
        3.86960039e-01,  5.33070833e-01, -3.07751261e-01, -8.87931490e-01,
       -8.13670903e-01, -1.79707784e+00, -6.45665182e-02,  1.03029453e+00,
       -5.52996507e-01,  5.96961462e-01, -1.61882715e-01,  7.80894899e-01,
       -1.14072535e-02,  3.53205862e-02,  1.33946274e-01, -3.16094301e-02,
        6.84972645e-02, -4.13154798e-01,  4.61489283e-01, -4.17356412e-02,
        1.99105091e-01,  6.43000000e+00])

In [20]:
actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)
optimizer = optim.Adam(list(actor.parameters()) + list(critic.parameters()), lr = 0.001) 

In [21]:
def one_hot(state):
    X = np.zeros(state_dim)
    if isinstance(state, tuple):
        index = state[0]
        X[int(index)] = 1
    else:
        X[state] = 1
    return torch.tensor(X, dtype = torch.float64).unsqueeze(0)

In [None]:
torch.autograd.set_detect_anomaly(True)

for episode in range(1000):
    state = env.reset()
    done = False
    rewards = []
    log_probs = []
    values = []
    states = []
    actions = []

    while not done:
        state_tensor = one_hot(state).to(torch.float)
        probs = actor(state_tensor)
        dist = torch.distributions.Categorical(probs)
        action = dist.sample()
        log_prob = dist.log_prob(action)
        value = critic(state_tensor)

        next_state, reward, done, _, _ = env.step(action.item())
        rewards.append(reward)
        log_probs.append(log_prob)
        values.append(value)
        states.append(state_tensor)
        actions.append(action)

        state = next_state

    # Calcul des retours
    returns = []
    G = 0
    for r in reversed(rewards):
        G = r + 0.99 * G
        returns.insert(0, G)
    returns = torch.tensor(returns, dtype=torch.float).unsqueeze(1)
    values = torch.cat(values)
    advantages = returns - values.detach()

    # Mise à jour des modèles
    for t in range(len(rewards)):
        state_tensor = states[t]
        probs = actor(state_tensor)
        dist = torch.distributions.Categorical(probs)

        # utiliser la même action qu'au moment de la collecte
        new_log_prob = dist.log_prob(actions[t])
        ratio = torch.exp(new_log_prob - log_probs[t].detach())
        clipped_ratio = torch.clamp(ratio, 0.8, 1.2)

        advantage = advantages[t]

        surrogate1 = ratio * advantage
        surrogate2 = clipped_ratio * advantage
        policy_loss = -torch.min(surrogate1, surrogate2)

        value = critic(state_tensor)
        return_t = returns[t]
        value_loss = (value - return_t).pow(2)

        entropy = dist.entropy().mean()
        loss = policy_loss + 0.5 * value_loss - 0.01 * entropy

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Episode {episode + 1}, Récompense: {sum(rewards)}")


In [None]:
### Teest

state = env.reset()
done = False
total_reward = 0

while not done:
    state_tensor = one_hot(state).to(torch.float)
    
    with torch.no_grad():
        probs = actor(state_tensor)
    
    action = torch.argmax(probs).item()  # greedy choice (exploitation)
    
    state, reward, done, _, _ = env.step(action)
    total_reward += reward

print("Reward obtenue :", total_reward)

state_tensor = one_hot(state).to(torch.float)

with torch.no_grad():
    value = critic(state_tensor)

print("Valeur estimée de l’état :", value.item())


In [24]:
torch.save(actor.state_dict(), "models/actor.pth")
torch.save(critic.state_dict(), "models/critic.pth")


In [None]:
actor.load_state_dict(torch.load("actor_frozenlake.pth"))
critic.load_state_dict(torch.load("critic_frozenlake.pth"))
actor.eval()
critic.eval()