In [1]:
import numpy as np
import pandas as pd
import os
import torch
os.chdir("../")

from src.conf import LAYERS_DIMS, MODEL_FEATURES
from src.utils import get_device
from src.models import FraudAutoEncoder

## Data Gen

In [2]:
n_, k_ = 1_000, 300

In [3]:
pd.DataFrame(
    data=np.random.uniform(** {"low": -1, "high": 1, "size": [n_, k_]}),
    columns=[f"X_{str(i).zfill(3)}" for i in range(1, 1+k_)]
).to_parquet("./data/simulated_raw_data_new_arrival.gzip", compression="gzip")

In [4]:
pd.read_parquet("/home/onyxia/work/fraud_detection/data/simulated_raw_data_new_arrival.gzip")

Unnamed: 0,X_001,X_002,X_003,X_004,X_005,X_006,X_007,X_008,X_009,X_010,...,X_291,X_292,X_293,X_294,X_295,X_296,X_297,X_298,X_299,X_300
0,0.230085,0.215282,0.737480,-0.029383,0.686564,0.204632,0.242007,0.763501,0.861594,0.160049,...,-0.435087,-0.657290,0.702238,-0.957188,-0.251992,-0.406932,-0.532268,-0.400508,-0.078877,0.914417
1,-0.746929,0.501888,0.329585,0.223508,-0.933238,0.470296,-0.392319,-0.207745,-0.839040,0.073136,...,0.569445,0.605132,0.318652,0.553808,-0.384745,-0.543778,-0.385875,-0.695250,0.552621,-0.878643
2,-0.456121,-0.550659,-0.412170,-0.397030,-0.682750,-0.021660,0.358546,0.343715,0.822910,0.175702,...,-0.135015,0.868221,0.585312,-0.579202,0.131063,-0.419334,-0.781088,-0.515235,0.451785,0.428911
3,0.207372,-0.111627,0.993033,0.218333,0.964496,-0.429259,-0.638620,0.791188,0.493548,0.997451,...,-0.185388,-0.912298,0.716268,0.658407,0.778551,-0.910275,-0.086561,0.509355,0.680457,0.476367
4,0.126396,-0.172714,-0.174762,0.824663,-0.500215,-0.216015,-0.832705,0.997041,-0.075975,-0.521672,...,0.170127,0.248603,-0.047432,-0.487492,-0.897178,-0.627577,-0.774171,-0.207906,0.819025,-0.026374
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,-0.357600,0.887004,-0.485512,0.492853,-0.199034,-0.381747,-0.421489,-0.915021,-0.259465,0.116452,...,-0.515072,0.121284,-0.826953,-0.478490,0.764222,-0.092056,0.810230,0.126713,-0.291540,0.071808
996,-0.273161,-0.121791,-0.705445,-0.959673,-0.112367,0.989670,0.015037,0.550427,-0.329168,0.279856,...,-0.950121,0.659461,0.203389,-0.304165,-0.777573,0.311494,0.227090,-0.942610,0.520086,-0.420401
997,-0.547391,-0.506801,0.363367,0.620437,-0.204963,0.304655,0.112276,-0.894236,0.884614,0.808719,...,-0.221628,-0.897978,0.085584,0.835389,-0.406467,0.534537,0.502468,0.045031,-0.551671,-0.445643
998,0.181385,0.984729,0.067592,-0.105072,-0.968100,0.863401,-0.364973,0.983601,0.838131,0.791857,...,-0.935953,-0.401690,-0.636834,0.071452,0.185493,0.035622,-0.702995,-0.873283,0.541838,-0.347121


## Model Loading

In [5]:
DEVICE = get_device(1)

_LAYERS_DIMS = LAYERS_DIMS(
    INPUT_DIM=300,
    HIDDEN_DIM=150,
    CODE_DIM=35
)
model = FraudAutoEncoder(_LAYERS_DIMS).to(DEVICE)
model_hyperparams = MODEL_FEATURES(
    LEARNING_RATE=1e-3,
    N_EPOCHS=100
)
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=model_hyperparams.LEARNING_RATE
)
loss_criterion = torch.nn.MSELoss()

In [6]:
checkpoint = torch.load("./models/best_model_simulated_data.ckpt", map_location="cuda")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']

model.eval()

FraudAutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=300, out_features=150, bias=True)
    (1): ReLU()
    (2): Linear(in_features=150, out_features=35, bias=True)
    (3): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=35, out_features=150, bias=True)
    (1): ReLU()
    (2): Linear(in_features=150, out_features=300, bias=True)
    (3): ReLU()
  )
)