In [None]:
!pip install torch lightning numpy kaggle wandb torch-geometric
!pip install polars  -U

Collecting lightning
  Downloading lightning-2.1.3-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
Collecting wandb
  Downloading wandb-0.16.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.8.0 (from lightning)
  Downloading lightning_utilities-0.10.0-py3-none-any.whl (24 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.2.1-py3-none-any.whl (806 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m806.1/806.1 kB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning (from light

In [None]:
from google.colab import files

# Carica il file kaggle.json
files.upload()


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"alexxxyy47","key":"8857141920c583439fca16017c17e83c"}'}

In [None]:
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [None]:
!kaggle datasets download -d ealaxi/paysim1
!unzip paysim1.zip
!rm paysim1.zip

Downloading paysim1.zip to /content
100% 177M/178M [00:06<00:00, 31.9MB/s]
100% 178M/178M [00:06<00:00, 28.1MB/s]
Archive:  paysim1.zip
  inflating: PS_20174392719_1491204439457_log.csv  


In [None]:
import pandas as pd, sys, plotly.graph_objects as go, plotly.express as px, numpy as np, torch, random as rnd, torch.nn as nn, lightning as l, wandb as wndb
from torch.utils.data import Dataset, DataLoader
from sklearn.utils import shuffle
from torch_geometric import seed_everything
import polars as pl
from torch_geometric.data import Data
import pdb
from torch_geometric.nn import GCNConv
import torchmetrics
from torch.nn import Linear, ReLU
from torch_geometric.nn import Sequential  as GSequential, GCNConv
from torchmetrics.classification import BinaryAccuracy, BinaryF1Score, BinaryPrecision, BinaryRecall

In [None]:
# PARAMETERS

DEVICE = "cuda"
SEED = 42

rnd.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
#torch.backends.cudnn.deterministic = False
# torch.backends.cudnn.deterministic = True
seed_everything(SEED)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
ACCELERATOR =  "gpu" if torch.cuda.is_available() else "cpu"
POS_SIZE = 150
NEG_SIZE = 1200


In [None]:
# UTILS FUNCTIONS

def load_dataframe( dataset_file : str):
    return pl.read_csv(dataset_file)


def find_null_or_empty_records( dataframe: pd.DataFrame):
    n = len(dataframe)
    for index, row in dataframe.iterrows():
        print_progress_bar(index/n)
        # Controlla se ci sono valori nulli o vuoti nel record
        if row.isnull().any() or any(map(lambda x: x == '', row)):
            # Stampa il record
            print(f"Record con valori nulli o vuoti:\n{row}\n")

def print_progress_bar(percentuale, lunghezza_barra=20):
    blocchi_compilati = int(lunghezza_barra * percentuale)
    barra = "[" + "=" * (blocchi_compilati - 1) + ">" + " " * (lunghezza_barra - blocchi_compilati) + "]"
    sys.stdout.write(f"\r{barra} {percentuale * 100:.2f}% completo")
    sys.stdout.flush()


def compute_kind_inconsistence(dataframe):
    return {"inconsistent orig balance": len(dataframe.query('abs(oldbalanceOrg - newbalanceOrig) != amount'))/len(dataframe),
            "inconsistent dest balance": len(dataframe.query('abs(oldbalanceDest - newbalanceDest) != amount'))/len(dataframe),
            "zero cash transaction": len(dataframe.query('amount == 0 '))/len(dataframe),
            "self-transaction": len(dataframe.query('nameOrig == nameDest'))/len(dataframe)
            }

def plot_histogram(to_plot):


    # Converti il dizionario in un array di valori
    values = list(to_plot.values())

    # Crea un istogramma
    fig = go.Figure(data=[go.Bar(x=list(to_plot.keys()), y=values)])

    # Mostra l'istogramma
    fig.show()



def plot_categories(dataframe):
    # Calcola la frequenza di ogni categoria nella colonna 'type'
    counts = dataframe['type'].value_counts().reset_index()

    # Rinomina le colonne
    counts.columns = ['type', 'count']

    counts['count'] = counts['count'] / counts['count'].sum()

    # Crea l'istogramma con Plotly Express
    fig = px.bar(counts, x='type', y='count', title='Istogramma delle categorie nella colonna "type"')

    # Mostra il plot
    fig.show()

def create_name_dict(df):
  df1 = df.select(pl.col("nameOrig").alias('name'))
  df2 = df.select(pl.col("nameDest").alias('name'))
  df = pl.concat([df1,df2])
  df = df.unique()
  names = list(df['name'])
  return dict(zip(names,list(range(len(names)))))


def divide_dataset(dataset_file,train_prc,val_prc):
  #breakpoint()
  dataframe = load_dataframe(dataset_file)
  transaction_types = {
      "CASH_IN": 0,
      "CASH_OUT": 1,
      "DEBIT": 2,
      "PAYMENT": 3,
      "TRANSFER": 4
  }

  dataframe = dataframe.with_columns(pl.col("type").replace(transaction_types).cast(pl.Int64).alias("type"),
                                     (pl.col('step')%24).alias('step'))

  id_df  = pl.DataFrame({'id': list(range(len(dataframe)))})

  dataframe = pl.concat([dataframe, id_df], how="horizontal")

  d_neg = dataframe.filter((pl.col('amount') != 0) & (pl.col('isFraud') == 0))
  neg_data_train = d_neg.sample(int(len(d_neg)*train_prc))
  d_neg = d_neg.filter(~pl.col('id').is_in(neg_data_train.select(pl.col('id'))))

  d_pos = dataframe.filter((pl.col('amount') != 0) & (pl.col('isFraud') == 1))
  pos_data_train = d_pos.sample(int(len(d_pos)*train_prc))
  d_pos = d_pos.filter(~pl.col('id').is_in(pos_data_train.select(pl.col('id'))))


  neg_data_val = d_neg.sample(int(len(d_neg)*val_prc))
  d_neg = d_neg.filter(~pl.col('id').is_in(neg_data_val.select(pl.col('id'))))

  pos_data_val = d_pos.sample(int(len(d_pos)*val_prc))
  d_pos = d_pos.filter(~pl.col('id').is_in(pos_data_val.select(pl.col('id'))))

  neg_data_train = neg_data_train.select(pl.exclude('id'))
  pos_data_train = pos_data_train.select(pl.exclude('id'))

  neg_data_val = neg_data_val.select(pl.exclude('id'))
  pos_data_val = pos_data_val.select(pl.exclude('id'))

  d_neg = d_neg.select(pl.exclude('id'))
  d_pos = d_pos.select(pl.exclude('id'))

  return (neg_data_train, pos_data_train), (neg_data_val, pos_data_val), (d_neg,d_pos )

def list_to_dataframe(data):
  rows = []
  for row in data:
    el = {
        'step': row[0],
        'type': int(row[1]),
        'amount': row[2] ,
        'nameOrig': row[3],
        'oldbalanceOrg': row[4],
        'newbalanceOrig': row[5],
        'nameDest': row[6],
        'oldbalanceDest': row[7],
        'newbalanceDest': row[8],
        'isFraud': row[9],
        'isFlaggedFraud': row[10]
     }
    rows.append(el)
  return pl.DataFrame(rows)






In [None]:
dataframe = load_dataframe("PS_20174392719_1491204439457_log.csv")
dataframe = dataframe.cast({"isFraud": pl.Int8})

In [None]:
d = create_name_dict(dataframe)

In [None]:
len(d.keys())

9073900

In [None]:
divide_dataset("PS_20174392719_1491204439457_log.csv",0.7,0.1)

((shape: (4_448_084, 11)
  ┌──────┬──────┬───────────┬─────────────┬───┬──────────────┬──────────────┬─────────┬──────────────┐
  │ step ┆ type ┆ amount    ┆ nameOrig    ┆ … ┆ oldbalanceDe ┆ newbalanceDe ┆ isFraud ┆ isFlaggedFra │
  │ ---  ┆ ---  ┆ ---       ┆ ---         ┆   ┆ st           ┆ st           ┆ ---     ┆ ud           │
  │ i64  ┆ str  ┆ f64       ┆ str         ┆   ┆ ---          ┆ ---          ┆ i64     ┆ ---          │
  │      ┆      ┆           ┆             ┆   ┆ f64          ┆ f64          ┆         ┆ i64          │
  ╞══════╪══════╪═══════════╪═════════════╪═══╪══════════════╪══════════════╪═════════╪══════════════╡
  │ 16   ┆ 4    ┆ 2.7249e6  ┆ C111177078  ┆ … ┆ 0.0          ┆ 2.8515e6     ┆ 0       ┆ 0            │
  │ 15   ┆ 0    ┆ 122161.91 ┆ C348307229  ┆ … ┆ 2018844.6    ┆ 1.1433e6     ┆ 0       ┆ 0            │
  │ 16   ┆ 4    ┆ 115519.78 ┆ C106220047  ┆ … ┆ 0.0          ┆ 126625.6     ┆ 0       ┆ 0            │
  │ 20   ┆ 1    ┆ 268483.68 ┆ C1882402481 ┆ … ┆ 

In [None]:
dataframe.columns


['step',
 'type',
 'amount',
 'nameOrig',
 'oldbalanceOrg',
 'newbalanceOrig',
 'nameDest',
 'oldbalanceDest',
 'newbalanceDest',
 'isFraud',
 'isFlaggedFraud']

In [None]:
len(dataframe.filter(pl.col('amount') == 0))

16

In [None]:
#| (pl.col('nameDest').str.starts_with('M'))   (abs(pl.col('oldbalanceOrg') - pl.col('newbalanceOrig') )) == abs( pl.col('oldbalanceDest') - pl.col('newbalanceDest'))) |
print(len(dataframe.filter( (pl.col('nameDest').str.starts_with('M'))  )))
print(len(dataframe.filter( (pl.col('nameOrig').str.starts_with('M'))  )))
print(len(dataframe.filter( (pl.col('isFraud') == 1)  )))

2151495
0
8213


In [None]:
print(len(dataframe.filter( (pl.col('nameDest').str.starts_with('M'))  |   (abs(pl.col('oldbalanceOrg') - pl.col('newbalanceOrig') ) == abs( pl.col('oldbalanceDest') - pl.col('newbalanceDest')) )           )       ))

2393661


In [None]:
print(len(dataframe.filter( (pl.col('isFraud') == 1) & (~pl.col('nameDest').str.starts_with('M'))  &   (abs(pl.col('oldbalanceOrg') - pl.col('newbalanceOrig') ) != abs( pl.col('oldbalanceDest') - pl.col('newbalanceDest')) )           )       ))

6036


In [None]:
df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3] })

In [None]:
print(df.sample(1))
print(df.sample(1))

shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2   ┆ 2   │
└─────┴─────┘
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2   ┆ 2   │
└─────┴─────┘


In [None]:
class FraudDetectionDataset(Dataset):

    def __init__(self,neg_data, pos_data,device):
      self.neg_data = neg_data
      self.pos_data = pos_data
      self.device = device



    def collate(self, data ):
      #breakpoint()
      data = list_to_dataframe(data)
      pos = self.pos_data.sample(self.pos_num)
      data = pl.concat([pos, data])
      name_d = create_name_dict(data)
      x = torch.tensor([[1] if y.startswith("M") else [0] for y in name_d.keys()], dtype=torch.float)#.to(self.device)
      data = data.with_columns(pl.col('nameOrig').replace(name_d).cast(pl.Int64).alias('nameOrig'), pl.col('nameDest').replace(name_d).cast(pl.Int64).alias('nameDest'))
      edges = data.select(pl.col('nameOrig','nameDest'))
      edge_index = torch.tensor(edges.to_numpy(), dtype=torch.int).t().contiguous()#.to(self.device)
      y = torch.tensor(data.select(pl.col('isFraud')).to_numpy(), dtype=torch.float)#.to(self.device)
      edge_attr =  torch.tensor(data.select(pl.col('amount')).to_numpy(), dtype=torch.float)#.to(self.device)
      data_graph = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
      return data, data_graph




    def __getitem__(self, index):
      return self.neg_data.row(index)

    def __len__(self):
      return len(self.neg_data)

    def get_dataloader(self, batch_size, pos_num):
      self.pos_num = pos_num
      return DataLoader(self, batch_size=batch_size, shuffle=False, collate_fn = self.collate)





In [None]:
class FraudDetectionModuleOld(nn.Module):

  def __init__(self,gnn_in_size, gnn_out_size, linear_in_size, linear_out_size, device):
    super(FraudDetectionModuleOld, self).__init__()
    self.gnn = GCNConv(gnn_in_size,gnn_out_size)
    self.classifier = nn.Linear(linear_in_size, linear_out_size)
    self.sigmoid = nn.Sigmoid()
    self.relu = nn.ReLU()
    self.device = device

  def forward(self,data):
    #breakpoint()
    edge_index = data[1].edge_index
    edge_attr = data[1].edge_attr
    x = data[1].x
    train_edges = data[0]


    train_features = torch.tensor(train_edges.select(pl.col('step','type','amount')).to_numpy(), dtype=torch.float ).to(self.device)

    x = self.relu(self.gnn(x, edge_index, edge_attr))

    from_nodes = torch.nan_to_num(x.squeeze()[edge_index[0,:].squeeze()])
    dest_nodes = torch.nan_to_num(x.squeeze()[edge_index[1,:].squeeze()])

    to_classify = torch.cat((from_nodes,dest_nodes,train_features), dim=1)

    out = self.classifier(to_classify)
    return out










In [None]:
def train(model, epochs, train_dataloader, val_dataloader, loss, optimizer, f1):
  for epoch in range(epochs):
    # Addestramento
    model.train()
    train_loss_epoch = []
    i = 0
    for batch_inputs in train_dataloader:

        print_progress_bar(i/len(train_dataloader))
        i+=1
        outputs = model(batch_inputs)
        train_loss = loss(outputs, batch_inputs[1].y)
        train_loss_epoch.append(train_loss)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

    val_loss, f1_score = validate(model, val_dataloader,loss,f1)
    print(f'Epoch [{epoch+1}/{epochs}], Training Loss: {sum(train_loss_epoch)/len(train_loss_epoch)}, Validation Loss: {val_loss}, f1 score = {f1_score}')




def validate(model, dataloader, loss, f1):
  model.eval()
  with torch.no_grad():
    val_loss_out = []
    f1_out = []
    i=0
    for batch_inputs in dataloader:
      print_progress_bar(i/len(dataloader))
      i+=1
      val_outputs = model(batch_inputs)
      val_loss = loss(val_outputs, batch_inputs[1].y)
      val_f1 = f1(val_outputs, batch_inputs[1].y)
      f1_out.append(val_f1)
      val_loss_out.append(val_loss)
  return sum(val_loss_out)/len(val_loss_out), sum(f1_out)/len(f1_out)


In [None]:
class ModuleCallback(L.Callback):

  def on_train_epoch_end(self, trainer, pl_module):

      epoch_mean = torch.stack(pl_module.train_loss).mean()
      print("training_epoch_mean loss = ", epoch_mean)
      wndb.log({"train_loss": epoch_mean})
      # free up the memory
      pl_module.train_loss.clear()

  def on_validation_epoch_end(self,trainer, pl_module):

    mean_loss = torch.stack(pl_module.val_loss).mean()
    mean_f1 = torch.stack(pl_module.f1_score).mean()
    mean_acc = torch.stack(pl_module.acc).mean()
    mean_prec = torch.stack(pl_module.prec).mean()
    mean_rec = torch.stack(pl_module.rec).mean()

    print("val_loss = ", mean_loss)
    print("f1 = ", mean_f1)
    print("acc = ", mean_acc)
    print("prec = ", mean_prec)
    print("rec = ", mean_rec)
    wndb.log({"val_loss": mean_loss, "f1": mean_f1, "acc": mean_acc, "prec": mean_prec, "rec": mean_rec })



In [None]:
class GraphNN(nn.Module):

  def __init__(self,in_size, out_size, h_size, deep,activation):
    super(GraphNN, self).__init__()
    if deep == 1:
      layers = [GCNConv(in_size,out_size), activation]
    else:
      layers = [GCNConv(in_size,h_size), activation]
      for _ in range(deep-2):
        layers.append(GCNConv(h_size,h_size))
        layers.append(activation)
      layers.append(GCNConv(h_size,out_size))
    self.gnn = GSequential(*layers)

  def forward(self,data):
    edge_index = data.edge_index
    edge_attr = data.edge_attr
    x = data.x
    return self.gnn(x, edge_index, edge_attr)






In [None]:
class LinearNN(nn.Module):
  def __init__(self,in_size, out_size, h_size, deep,activation):
    super(LinearNN, self).__init__()
    if deep == 1:
      layers = [nn.Linear(in_size,out_size), activation]
    else:
      layers = [nn.Linear(in_size,h_size), activation]
      for _ in range(deep-2):
        layers.append(nn.Linear(h_size,h_size))
        layers.append(activation)
      layers.append(nn.Linear(h_size,out_size))
    self.linear = nn.Sequential(*layers)

  def forward(self,data):
    return self.linear(data)

In [None]:
class FraudDetectionModule(l.LightningModule):

  def __init__(self,gnn,linear, lr, wd):
    super(FraudDetectionModule, self).__init__()
    self.gnn = gnn
    self.classifier = linear
    self.sigmoid = nn.Sigmoid()
    self.relu = nn.ReLU()
    self.loss = nn.BCEWithLogitsLoss()
    self.accuracy = BinaryAccuracy()
    self.precision = BinaryPrecision()
    self.recall = BinaryRecall()
    self.f1 = BinaryF1Score()
    self.lr = lr
    self.wd = wd
    self.acc = []
    self.prec = []
    self.rec = []
    self.f1_score = []
    self.train_loss = []
    self.val_loss = []



  def forward(self,data):
    #breakpoint()

    train_edges = data[0]


    train_features = torch.tensor(train_edges.select(pl.col('step','type','amount')).to_numpy(), dtype=torch.float )

    x = self.relu(self.gnn(data[1]))

    from_nodes = torch.nan_to_num(x.squeeze()[edge_index[0,:].squeeze()])
    dest_nodes = torch.nan_to_num(x.squeeze()[edge_index[1,:].squeeze()])

    to_classify = torch.cat((from_nodes,dest_nodes,train_features), dim=1)

    out = self.classifier(to_classify)
    return self.sigmoid(out)

  def training_step(self, batch, batch_idx):

    z = self.forward(batch)
    y = batch[1].y

    loss = self.loss(z,y)
    self.train_loss.append(loss)
    self.log("train_loss", loss, prog_bar=True)
    return loss


  def validation_step(self, batch, batch_idx):
    with torch.no_grad():
      #breakpoint()

      #print("validation")

      z = self.forward(batch)

      #breakpoint()
      val_loss = self.loss(z,batch[1].y)
      acc = self.accuracy(z, batch[1].y)
      prec = self.precision(z, batch[1].y)
      rec = self.recall(z, batch[1].y)
      f1 = self.f1(z,batch[1].y)

      self.acc.append(acc)
      self.prec.append(prec)
      self.rec.append(rec)
      self.f1_score.append(f1)
      self.val_loss.append(val_loss)

      #wndb.log({"val_loss": val_loss,"f1-score":f1})
      self.log_dict({"val_loss": val_loss,"f1-score":f1}, prog_bar=True)


  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.wd)
    return optimizer


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
ACCELERATOR =  "gpu" if torch.cuda.is_available() else "cpu"
POS_SIZE = 150
NEG_SIZE = 1200

IN_GNN = 1
H_GNN = 64
OUT_GNN = 10
DEEP_GNN = 2
ACTIVATION_GNN = nn.ReLU()
IN_NN = 23
OUT_NN = 1
H_NN = 32
DEEP_NN = 2
ACTIVATION_NN = nn.ReLU()
LR = 1e-4
WD = 1e-5

In [None]:
train_set, validation_set, test_set = divide_dataset("PS_20174392719_1491204439457_log.csv",0.7,0.1)

train_dataset =  FraudDetectionDataset(train_set[0], train_set[1], DEVICE)
validation_dataset =  FraudDetectionDataset(validation_set[0], validation_set[1], DEVICE)

train_loader = train_dataset.get_dataloader(NEG_SIZE,POS_SIZE)
validation_loader = validation_dataset.get_dataloader(NEG_SIZE,POS_SIZE)

In [None]:
gnn = GraphNN(IN_GNN, OUT_GNN, H_GNN, DEEP_GNN, ACTIVATION_GNN)
linear = LinearNN(IN_NN, OUT_NN, H_NN, DEEP_NN, ACTIVATION_NN)

model = FraudDetectionModule(gnn,linear,LR,WD)


FraudDetectionModule(
  (gnn): GCNConv(1, 8)
  (classifier): Linear(in_features=19, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (relu): ReLU()
)

In [None]:
DEVICE

trainer = l.Trainer(deterministic=True, max_epochs=40, accelerator=ACCELERATOR, callbacks=[ModuleCallback()])


INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
torch.use_deterministic_algorithms(False)
trainer.fit(modelL, train_loader, validation_loader)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name       | Type              | Params
-------------------------------------------------
0 | gnn        | GCNConv           | 16    
1 | classifier | Linear            | 20    
2 | sigmoid    | Sigmoid           | 0     
3 | relu       | ReLU              | 0     
4 | loss       | BCEWithLogitsLoss | 0     
5 | accuracy   | BinaryAccuracy    | 0     
6 | precision  | BinaryPrecision   | 0     
7 | recall     | BinaryRecall      | 0     
8 | f1         | BinaryF1Score     | 0     
-------------------------------------------------
36        Trainable params
0         Non-trainable params
36        Total params
0.000     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name       | Type              | Params
-------------------------------------------------
0 | gnn        | GCNConv           | 16    
1 | cl

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

acc =  0.8888888955116272
prec =  0.0
rec =  0.0
val_loss =  30411.830078125
f1-score =  0.0
acc =  0.8888888955116272
prec =  0.0
rec =  0.0
val_loss =  30868.53125
f1-score =  0.0


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

acc =  0.8948147892951965
prec =  0.5285714268684387
rec =  0.4933333396911621
val_loss =  0.42281848192214966
f1-score =  0.5103448033332825
acc =  0.9155555367469788
prec =  0.6578947305679321
rec =  0.5
val_loss =  0.4148039221763611
f1-score =  0.5681818127632141
acc =  0.9066666960716248
prec =  0.6200000047683716
rec =  0.41333332657814026
val_loss =  0.34951677918434143
f1-score =  0.4959999918937683
acc =  0.9125925898551941
prec =  0.6333333253860474
rec =  0.5066666603088379
val_loss =  0.3828001916408539
f1-score =  0.5629629492759705


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2699. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2700. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


acc =  0.9125925898551941
prec =  0.640350878238678
rec =  0.4866666793823242
val_loss =  0.37422001361846924
f1-score =  0.5530303120613098
acc =  0.9074074029922485
prec =  0.6068376302719116
rec =  0.47333332896232605
val_loss =  0.32870957255363464
f1-score =  0.5318351984024048


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2697. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


acc =  0.914814829826355
prec =  0.6335877776145935
rec =  0.5533333420753479
val_loss =  0.40635618567466736
f1-score =  0.5907473564147949
acc =  0.9200000166893005
prec =  0.6909090876579285
rec =  0.5066666603088379
val_loss =  0.3237283527851105
f1-score =  0.5846154093742371
acc =  0.9162963032722473
prec =  0.6581196784973145
rec =  0.5133333206176758
val_loss =  0.46088850498199463
f1-score =  0.5767790079116821
acc =  0.9088888764381409
prec =  0.604651153087616
rec =  0.5199999809265137
val_loss =  0.3703654110431671
f1-score =  0.5591397881507874
acc =  0.9125925898551941
prec =  0.6290322542190552
rec =  0.5199999809265137
val_loss =  0.40822404623031616
f1-score =  0.569343090057373
acc =  0.9140740633010864
prec =  0.6416666507720947
rec =  0.5133333206176758
val_loss =  0.3516024649143219
f1-score =  0.5703703761100769
acc =  0.9155555367469788
prec =  0.6428571343421936
rec =  0.5400000214576721
val_loss =  0.3811556398868561
f1-score =  0.5869565010070801
acc =  0.9125

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2696. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


acc =  0.9133333563804626
prec =  0.6460176706314087
rec =  0.4866666793823242
val_loss =  0.3737969696521759
f1-score =  0.5551331043243408
acc =  0.9244444370269775
prec =  0.7264150977134705
rec =  0.5133333206176758
val_loss =  0.26855170726776123
f1-score =  0.6015625
acc =  0.9096296429634094
prec =  0.6147540807723999
rec =  0.5
val_loss =  0.39829039573669434
f1-score =  0.5514705777168274
acc =  0.9111111164093018
prec =  0.6229507923126221
rec =  0.5066666603088379
val_loss =  0.32536983489990234
f1-score =  0.5588235259056091
acc =  0.9170370101928711
prec =  0.6461538672447205
rec =  0.5600000023841858
val_loss =  0.3115302324295044
f1-score =  0.6000000238418579
acc =  0.9155555367469788
prec =  0.6475409865379333
rec =  0.5266666412353516
val_loss =  0.3382115364074707
f1-score =  0.5808823704719543
acc =  0.9125925898551941
prec =  0.631147563457489
rec =  0.5133333206176758
val_loss =  0.33183103799819946
f1-score =  0.5661764740943909
acc =  0.9140740633010864
prec =  

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2694. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


acc =  0.9111111164093018
prec =  0.6271186470985413
rec =  0.4933333396911621
val_loss =  0.3132723569869995
f1-score =  0.5522388219833374
acc =  0.914814829826355
prec =  0.6521739363670349
rec =  0.5
val_loss =  0.272513210773468
f1-score =  0.5660377144813538
acc =  0.9140740633010864
prec =  0.6574074029922485
rec =  0.47333332896232605
val_loss =  0.42013898491859436
f1-score =  0.5503876209259033
acc =  0.9185185432434082
prec =  0.6754385828971863
rec =  0.5133333206176758
val_loss =  0.33233389258384705
f1-score =  0.5833333134651184
acc =  0.9051851630210876
prec =  0.5932203531265259
rec =  0.46666666865348816
val_loss =  0.339537113904953
f1-score =  0.5223880410194397
acc =  0.9096296429634094
prec =  0.6186440587043762
rec =  0.4866666793823242
val_loss =  0.29759421944618225
f1-score =  0.5447761416435242
acc =  0.9133333563804626
prec =  0.6341463327407837
rec =  0.5199999809265137
val_loss =  0.35270214080810547
f1-score =  0.5714285969734192
acc =  0.9111111164093018

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2364. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |          | 0/? [00:00<?, ?it/s]

acc =  0.7940740585327148
prec =  0.3048780560493469
rec =  0.6666666865348816
val_loss =  1.5927990674972534
f1-score =  0.4184100329875946
acc =  0.8199999928474426
prec =  0.34343433380126953
rec =  0.6800000071525574
val_loss =  1.3720039129257202
f1-score =  0.4563758373260498
acc =  0.8066666722297668
prec =  0.31561461091041565
rec =  0.6333333253860474
val_loss =  1.1024882793426514
f1-score =  0.421286016702652
acc =  0.8207407593727112
prec =  0.34967321157455444
rec =  0.7133333086967468
val_loss =  1.3105485439300537
f1-score =  0.46929824352264404
acc =  0.8111110925674438
prec =  0.32899022102355957
rec =  0.6733333468437195
val_loss =  1.1895835399627686
f1-score =  0.44201311469078064
acc =  0.8066666722297668
prec =  0.319218248128891
rec =  0.653333306312561
val_loss =  1.0383355617523193
f1-score =  0.42888402938842773
acc =  0.8051851987838745
prec =  0.31715211272239685
rec =  0.653333306312561
val_loss =  1.4765167236328125
f1-score =  0.4270152449607849
acc =  0.

In [None]:
train(model,30,train_loader,validation_loader,nn.BCEWithLogitsLoss(), torch.optim.Adam(model.parameters(), lr=1e-4), torchmetrics.classification.BinaryF1Score().to(DEVICE) )

[>                    ] 4.40% completo

KeyboardInterrupt: 