# Experimentation - CardTransactionFraudDetection

In [1]:
from env import FraudTransactionEnv
from models.model import Actor, Critic
import torch.optim as optim
import torch
import numpy as np

In [3]:
env = FraudTransactionEnv(csv_path='data/creditcard.csv')

state = env.reset()
print("State shape:", state.shape)

next_state, reward, done, info = env.step(1)
print(f"Reward: {reward}")

State shape: (30,)
Reward: -5.0


In [4]:
state_dim = state.shape[0]
action_dim = 2

In [5]:
state

array([ 1.25735000e+05,  2.14309589e+00, -3.86467292e-01, -1.71357875e+00,
       -4.28998965e-01,  2.44773347e-01, -5.18825899e-01, -1.62612459e-02,
       -1.40988812e-01,  9.53468894e-01, -5.70106552e-03, -1.69817784e+00,
       -6.53525832e-01, -1.33757513e+00,  4.38061390e-01,  1.02266152e-01,
        9.89071796e-02, -3.06497718e-01, -6.00358401e-01,  7.41126336e-01,
       -2.19463718e-01, -3.46699183e-01, -9.56167615e-01,  2.72665760e-01,
        4.56717020e-02, -2.87558579e-01,  1.02474867e+00, -1.46339558e-01,
       -7.72226850e-02,  1.00000000e+01])

In [6]:
actor = Actor(state_dim, action_dim)
critic = Critic(state_dim)
optimizer = optim.Adam(list(actor.parameters()) + list(critic.parameters()), lr = 0.001) 

In [16]:
torch.autograd.set_detect_anomaly(True)

for episode in range(1000):
    state = env.reset()
    done = False
    rewards = []
    log_probs = []
    values = []
    states = []
    actions = []

    counter = 1
    while not done:
        state_tensor = torch.tensor(state, dtype=torch.float32)
        probs = actor(state_tensor)
        dist = torch.distributions.Categorical(probs)
        action = dist.sample()
        log_prob = dist.log_prob(action)
        value = critic(state_tensor)

        next_state, reward, done, _ = env.step(action.item())
        rewards.append(reward)
        log_probs.append(log_prob)
        values.append(value)
        states.append(state_tensor)
        actions.append(action)

        state = next_state
        counter += 1
        if counter % 17 == 0:
            done = True

    # Calcul des retours
    returns = []
    G = 0
    for r in reversed(rewards):
        G = r + 0.99 * G
        returns.insert(0, G)
    returns = torch.tensor(returns, dtype=torch.float).unsqueeze(1)
    values = torch.cat(values)
    advantages = returns - values.detach()

    # Mise à jour des modèles
    for t in range(len(rewards)):
        state_tensor = states[t]
        probs = actor(state_tensor)
        dist = torch.distributions.Categorical(probs)

        # utiliser la même action qu'au moment de la collecte
        new_log_prob = dist.log_prob(actions[t])
        ratio = torch.exp(new_log_prob - log_probs[t].detach())
        clipped_ratio = torch.clamp(ratio, 0.8, 1.2)

        advantage = advantages[t]

        surrogate1 = ratio * advantage
        surrogate2 = clipped_ratio * advantage
        policy_loss = -torch.min(surrogate1, surrogate2)

        value = critic(state_tensor).squeeze()
        return_t = returns[t]
        value_loss = (value - return_t).pow(2)

        value_loss = value_loss.mean() if value_loss.ndim > 0 else value_loss

        entropy = dist.entropy().mean()
        policy_loss = policy_loss.mean()
        value_loss = value_loss.mean()
        loss = policy_loss + 0.5 * value_loss - 0.01 * entropy

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Episode {episode + 1}, Récompense: {sum(rewards)}")


Episode 1, Récompense: 16.0
Episode 2, Récompense: 16.0
Episode 3, Récompense: 16.0
Episode 4, Récompense: 5.0
Episode 5, Récompense: 16.0
Episode 6, Récompense: 16.0
Episode 7, Récompense: 16.0
Episode 8, Récompense: 16.0
Episode 9, Récompense: 16.0
Episode 10, Récompense: 16.0
Episode 11, Récompense: 16.0
Episode 12, Récompense: 16.0
Episode 13, Récompense: 16.0
Episode 14, Récompense: 16.0
Episode 15, Récompense: 16.0
Episode 16, Récompense: 16.0
Episode 17, Récompense: 16.0
Episode 18, Récompense: 16.0
Episode 19, Récompense: 16.0
Episode 20, Récompense: 16.0
Episode 21, Récompense: 16.0
Episode 22, Récompense: 16.0
Episode 23, Récompense: 16.0
Episode 24, Récompense: 16.0
Episode 25, Récompense: 16.0
Episode 26, Récompense: 16.0
Episode 27, Récompense: 16.0
Episode 28, Récompense: 16.0
Episode 29, Récompense: 16.0
Episode 30, Récompense: 16.0
Episode 31, Récompense: 16.0
Episode 32, Récompense: 16.0
Episode 33, Récompense: 16.0
Episode 34, Récompense: 16.0
Episode 35, Récompense: 

In [29]:
# fraud lignes
import pandas as pd
data = pd.read_csv('data/creditcard.csv')

data[data["Class"] == 1]

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
541,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00,1
623,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.00,1
4920,4462.0,-2.303350,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.562320,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.087330,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
6108,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.00,1
6329,7519.0,1.234235,3.019740,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.882850,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.292680,0.147968,390.00,1
280143,169347.0,1.378559,1.289381,-5.004247,1.411850,0.442581,-1.326536,-1.413170,0.248525,-1.127396,...,0.370612,0.028234,-0.145640,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.213700,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.652250,...,0.751826,0.834108,0.190944,0.032070,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.399730,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.253700,245.00,1


In [58]:
test_state = data.iloc[2811,]
print(test_state)
test_state = test_state.drop("Class")
test_state = test_state.values

Time      2372.000000
V1          -1.780920
V2           0.408923
V3           1.530475
V4          -0.311483
V5          -1.043961
V6           0.451480
V7           0.287108
V8           0.048006
V9           0.812018
V10          0.607823
V11          0.886199
V12          1.037978
V13          0.771896
V14         -0.787282
V15         -0.171349
V16          0.074052
V17         -0.314939
V18          0.306939
V19          1.435618
V20          0.100591
V21         -0.193392
V22          0.218986
V23         -0.058762
V24          0.126364
V25         -0.379544
V26          1.033241
V27          0.251388
V28          0.478213
Amount     150.000000
Class        0.000000
Name: 2811, dtype: float64


In [59]:
print("test state", test_state)
print("true state", state)

test state [ 2.37200000e+03 -1.78091985e+00  4.08923498e-01  1.53047457e+00
 -3.11482817e-01 -1.04396096e+00  4.51479703e-01  2.87107731e-01
  4.80055633e-02  8.12018461e-01  6.07823398e-01  8.86199473e-01
  1.03797766e+00  7.71895994e-01 -7.87281806e-01 -1.71349407e-01
  7.40515732e-02 -3.14939145e-01  3.06938976e-01  1.43561841e+00
  1.00591358e-01 -1.93392375e-01  2.18985991e-01 -5.87620732e-02
  1.26364159e-01 -3.79543631e-01  1.03324062e+00  2.51388279e-01
  4.78213314e-01  1.50000000e+02]
true state [ 1.25807000e+05  2.06747341e+00  1.37675683e-01 -1.66659974e+00
  4.45015904e-01  3.36154806e-01 -9.47921188e-01  1.65819815e-01
 -2.24773233e-01  5.36023339e-01 -3.68811632e-01 -5.30954200e-01
  8.20098082e-03 -2.77927195e-01 -7.73699330e-01  2.33255105e-01
  3.01579400e-01  5.29466368e-01 -3.01525036e-01  1.62299265e-02
 -1.80012065e-01 -3.61560014e-01 -9.52249461e-01  3.56409351e-01
  6.29771549e-01 -2.82838426e-01  1.66349142e-01 -6.46866190e-02
 -3.00985108e-02  1.79000000e+00]


In [60]:
### Teest et evaluation

state = env.reset()
done = False
total_reward = 0

while not done:
    state_tensor = torch.tensor(test_state, dtype=torch.float32)
    
    with torch.no_grad():
        probs = actor(state_tensor)
    
    action = torch.argmax(probs).item()  # greedy choice (exploitation)
    print("action: ", action)
    
    state, reward, done, _  = env.step(action)
    total_reward += reward
    done = True

print("Reward obtenue :", total_reward)

state_tensor = torch.tensor(state, dtype=torch.float32)

with torch.no_grad():
    value = critic(state_tensor)

print("Valeur estimée de l’état :", value.item())


action:  0
Reward obtenue : 1.0
Valeur estimée de l’état : 5.95544958114624


In [18]:
torch.save(actor.state_dict(), "models/actor.pth")
torch.save(critic.state_dict(), "models/critic.pth")


In [61]:
actor.load_state_dict(torch.load("models/actor.pth"))
critic.load_state_dict(torch.load("models/critic.pth"))
actor.eval()
critic.eval()

Critic(
  (fc1): Linear(in_features=30, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)