In [16]:
import os
import cv2
import sys
import timm
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F

from torch import nn
from tqdm import tqdm
from skimage import io
from pathlib import Path
from sklearn.metrics import confusion_matrix
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [17]:
# serve per ricaricare automaticamente il codice modificato
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
# directory principale di training, perche' nel file .csv ci sono solo i nomi dei file (immagini), da modificare per 
path = Path(os.getcwd()).parent.parent
real_data_dir = os.path.join(path, "artifact", "coco")
fake_data_dir = os.path.join(path, "artifact", "latent_diffusion")

BATCH_SIZE = 32

LR = 0.001

EPOCHS = 30

DEVICE = 'cuda'

In [19]:
# df che devo creare
csv_path = os.path.join("..", "datasets", "out.csv")
df = pd.read_csv(csv_path)
#df.head()
df = df.sample(frac=1)
train_df, valid_df = train_test_split(df, test_size=0.20, random_state=42)

print(train_df.size)
print(valid_df.size)

19200
4800


In [20]:
# carica le immagini nel dataset
class APN_Dataset(Dataset):

  def __init__(self, df):
    self.df = df

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    
    #2   0_img = io.imread(os.path.join(real_data_dir, row.Anchor))
    #P_img = io.imread(os.path.join(real_data_dir, row.Positive))
    #N_img = io.imread(os.path.join(fake_data_dir, row.Negative))
    
    
    if "coco" in row.Anchor:
        A_img = io.imread(os.path.join(real_data_dir, row.Anchor))
        P_img = io.imread(os.path.join(real_data_dir, row.Positive))
        N_img = io.imread(os.path.join(fake_data_dir, row.Negative))
    
    else:
        A_img = io.imread(os.path.join(fake_data_dir, row.Anchor))
        P_img = io.imread(os.path.join(fake_data_dir, row.Positive))
        N_img = io.imread(os.path.join(real_data_dir, row.Negative))

    # permute because the third channel has to be in first channel in torch

    A_img = torch.from_numpy(A_img).permute(2, 0, 1) / 255.0
    P_img = torch.from_numpy(P_img).permute(2, 0, 1) / 255.0
    N_img = torch.from_numpy(N_img).permute(2, 0, 1) / 255.0
    
    #A_img = np.expand_dims(A_img, 0)
    #P_img = np.expand_dims(P_img, 0)
    #N_img = np.expand_dims(N_img, 0)
    
    # normalizzazione per non far divergere il comportamento della rete
    # il valore dell'immagine sarà compreso tra 0 e 1
    #A_img = torch.from_numpy(A_img) / 255.0
    #P_img = torch.from_numpy(P_img) / 255.0
    #N_img = torch.from_numpy(N_img) / 255.0

    #A_img = torch.from_numpy(A_img.astype(np.int32)) / 65536.0
    #P_img = torch.from_numpy(P_img.astype(np.int32)) / 65536.0
    #N_img = torch.from_numpy(N_img.astype(np.int32)) / 65536.0

    return A_img, P_img, N_img

In [21]:
trainset = APN_Dataset(train_df)
validset = APN_Dataset(valid_df)

print(f"Size of trainset: {len(trainset)}")
print(f"Size of validset: {len(validset)}")

Size of trainset: 6400
Size of validset: 1600


In [22]:
trainloader = DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)
validloader = DataLoader(validset, batch_size = BATCH_SIZE)

In [23]:
print(f"No. of batches in trainloader : {len(trainloader)}")
print(f"No. of batches in validloader : {len(validloader)}")

No. of batches in trainloader : 200
No. of batches in validloader : 50


In [24]:
# FUNZIONE PER CARICARE IL MODELLO DI RETE NEURALE DIRETTAMENTE DALLE REPOSITORY ONLINE
class APN_Model(nn.Module):

  # QUI DEFINISCO LA SIZE DEL VETTORE DI EMBEDDING
  def __init__(self, emb_size = 512):
    super(APN_Model, self).__init__()

    # QUI CAIRCATE IL MODELLO, IN QUESTO CASO EFFICIENTNET VERSIONE B0 (LA PIù LEGGERA DELLA FAMIGLIA)
    self.efficientnet = timm.create_model('tf_efficientnetv2_b0', pretrained=False)
    self.efficientnet.classifier = nn.Linear(in_features=self.efficientnet.classifier.in_features, out_features=emb_size)

  def forward(self, images):
    embeddings = self.efficientnet(images)
    return embeddings

In [25]:
# QUI FATE UNA PICCOLA MODIFICA ALLA RETE PER FARLE AVERE IN INPUT IMMAGINI IN SCALA DI GRIGIO DELLO SPETTRO DI FOURIER
model = APN_Model()
#model.efficientnet.conv_stem = nn.Conv2d(1, 32, 3, 2, 1, bias=False)

model.to(DEVICE)

APN_Model(
  (efficientnet): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (drop_path): Identity()
        )
      )
      (1): Sequential(
        (0): EdgeResidual(
          (conv_exp): Conv2dSame(16, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
          (bn1): BatchNormAct2d(
            64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act)

In [26]:
# funzione di train
def train_fn(model, dataloader, optimizer, criterion):
  model.train() #ON Dropout
  total_loss = 0.0

  for A, P, N in tqdm(dataloader):
    A, P, N = A.to(DEVICE), P.to(DEVICE), N.to(DEVICE)

    print("debug, A shape: " + str(A.shape), file=sys.stderr)
    print("debug, P shape: " + str(P.shape), file=sys.stderr)
    print("debug, N shape: " + str(N.shape), file=sys.stderr)

    # qui vengono creati gli embeddings, le cui distanze verranno calcolate dopo
    A_embs = model(A)
    P_embs = model(P)
    N_embs = model(N)

    # criterion è la funzione di loss triplet
    loss = criterion(A_embs, P_embs, N_embs)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()

    return total_loss / len(dataloader)


In [27]:
# funzione di evaluation
def eval_fn(model, dataloader, criterion):
  model.eval() #OFF Dropout
  total_loss = 0.0

  with torch.no_grad():
    for A, P, N in tqdm(dataloader):
      A, P, N = A.to(DEVICE), P.to(DEVICE), N.to(DEVICE)

      A_embs = model(A)
      P_embs = model(P)
      N_embs = model(N)

      loss = criterion(A_embs, P_embs, N_embs)

      total_loss += loss.item()

    return total_loss / len(dataloader)

In [28]:
# triplet loss e adam
criterion = nn.TripletMarginLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = LR)

In [29]:
# training
best_valid_loss = np.Inf

for i in range(EPOCHS):
  train_loss = train_fn(model, trainloader, optimizer, criterion)
  valid_loss = eval_fn(model, validloader, criterion)

  if valid_loss < best_valid_loss:
    torch.save(model.state_dict(), 'best_model.pt')
    best_valid_loss = valid_loss
    print("SAVED_WEIGHTS_SUCCESS")

  print(f"EPOCHS : {i+1} train_loss : {train_loss} valid_loss : {valid_loss}")

  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [01:32<00:00,  1.86s/it]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 1 train_loss : 0.012199723720550537 valid_loss : 0.9999604296684265


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:50<00:00,  1.01s/it]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 2 train_loss : 0.01056828498840332 valid_loss : 0.9998945987224579


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:50<00:00,  1.01s/it]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 3 train_loss : 0.005008600950241089 valid_loss : 0.9997799372673035


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 4 train_loss : 0.004730768203735351 valid_loss : 0.9997707581520081


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 5 train_loss : 0.007531881928443908 valid_loss : 0.9992433297634125


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:49<00:00,  1.02it/s]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 6 train_loss : 0.007550429105758667 valid_loss : 0.9970844340324402


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:49<00:00,  1.01it/s]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 7 train_loss : 0.006054401993751526 valid_loss : 0.9965182542800903


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:48<00:00,  1.02it/s]


EPOCHS : 8 train_loss : 0.0037985152006149293 valid_loss : 0.9997199487686157


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 9 train_loss : 0.008998490571975708 valid_loss : 0.9945969188213348


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:48<00:00,  1.02it/s]


SAVED_WEIGHTS_SUCCESS
EPOCHS : 10 train_loss : 0.006700929403305054 valid_loss : 0.9863779938220978


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s]


EPOCHS : 11 train_loss : 0.007356112003326416 valid_loss : 0.9924172747135163


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:49<00:00,  1.00it/s]


EPOCHS : 12 train_loss : 0.006750366687774658 valid_loss : 1.035324090719223


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:49<00:00,  1.02it/s]


EPOCHS : 13 train_loss : 0.006952395439147949 valid_loss : 1.072048316001892


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:50<00:00,  1.01s/it]


EPOCHS : 14 train_loss : 0.006890549659729004 valid_loss : 1.1665906715393066


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:48<00:00,  1.02it/s]


EPOCHS : 15 train_loss : 0.005532442927360535 valid_loss : 1.3246399009227752


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:47<00:00,  1.06it/s]


EPOCHS : 16 train_loss : 0.008474782705307007 valid_loss : 1.7656272804737092


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:46<00:00,  1.07it/s]


EPOCHS : 17 train_loss : 0.006771793365478515 valid_loss : 2.4584686398506164


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:46<00:00,  1.07it/s]


EPOCHS : 18 train_loss : 0.0063381427526474 valid_loss : 2.59737242937088


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:46<00:00,  1.07it/s]


EPOCHS : 19 train_loss : 0.010566526651382446 valid_loss : 2.378260307312012


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:46<00:00,  1.07it/s]


EPOCHS : 20 train_loss : 0.008123047947883606 valid_loss : 2.353985582590103


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:46<00:00,  1.07it/s]


EPOCHS : 21 train_loss : 0.0066129457950592045 valid_loss : 1.8781253612041473


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [01:16<00:00,  1.53s/it]


EPOCHS : 22 train_loss : 0.007654682993888855 valid_loss : 1.7799255955219269


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:44<00:00,  1.13it/s]


EPOCHS : 23 train_loss : 0.0064543807506561275 valid_loss : 1.7491765820980072


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:48<00:00,  1.03it/s]


EPOCHS : 24 train_loss : 0.004505143165588379 valid_loss : 1.682971991300583


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:52<00:00,  1.04s/it]


EPOCHS : 25 train_loss : 0.005061110854148865 valid_loss : 1.6843982815742493


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:51<00:00,  1.03s/it]


EPOCHS : 26 train_loss : 0.004183213710784912 valid_loss : 1.7017081665992737


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:51<00:00,  1.03s/it]


EPOCHS : 27 train_loss : 0.008787307143211364 valid_loss : 1.5898219382762908


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:03<?, ?it/s]
100%|██████████| 50/50 [00:51<00:00,  1.03s/it]


EPOCHS : 28 train_loss : 0.005406515002250672 valid_loss : 1.2682227039337157


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:50<00:00,  1.01s/it]


EPOCHS : 29 train_loss : 0.005316870212554932 valid_loss : 1.2806662702560425


  0%|          | 0/200 [00:00<?, ?it/s]debug, A shape: torch.Size([32, 3, 200, 200])
debug, P shape: torch.Size([32, 3, 200, 200])
debug, N shape: torch.Size([32, 3, 200, 200])
  0%|          | 0/200 [00:04<?, ?it/s]
100%|██████████| 50/50 [00:58<00:00,  1.18s/it]

EPOCHS : 30 train_loss : 0.005211649537086487 valid_loss : 1.2873055493831635





In [30]:
# QUESTA E' LA FUNZIONE PER GENERARE I VETTORI DI ENCODING o embeddings
def get_encoding_csv(model, anc_img_names, dirFolderReal, dirFolderFake):
  anc_img_names_arr = np.array(anc_img_names)
  encodings = []

  model.eval()
  
  with torch.no_grad():
    for i in tqdm(anc_img_names_arr):
      if "coco" in i:
        A = io.imread(os.path.join(dirFolderReal,i))
        A = torch.from_numpy(A).permute(2, 0, 1) / 255.0
        #A = np.expand_dims(A, 0)
        #A = torch.from_numpy(A.astype(np.int32)) / 255.0
        A = A.to(DEVICE)
        A_enc = model(A.unsqueeze(0))
        encodings.append(A_enc.squeeze().cpu().detach().numpy())
      else:
        A = io.imread(os.path.join(dirFolderFake,i))
        A = torch.from_numpy(A).permute(2, 0, 1) / 255.0
        #A = np.expand_dims(A, 0)
        #A = torch.from_numpy(A.astype(np.int32)) / 255.0
        A = A.to(DEVICE)
        A_enc = model(A.unsqueeze(0))
        encodings.append(A_enc.squeeze().cpu().detach().numpy())         

  #with torch.no_grad():
  #  for i in tqdm(anc_img_names_arr):
  #    A = io.imread(os.path.join(dirFolderReal,i))
  #    A = torch.from_numpy(A).permute(2, 0, 1) / 255.0
  #    #A = np.expand_dims(A, 0)
  #    #A = torch.from_numpy(A.astype(np.int32)) / 255.0
  #    A = A.to(DEVICE)
  #    A_enc = model(A.unsqueeze(0))
  #    encodings.append(A_enc.squeeze().cpu().detach().numpy())

    encodings = np.array(encodings)
    encodings = pd.DataFrame(encodings)
    df_enc = pd.concat([anc_img_names, encodings], axis = 1)

    return df_enc

In [31]:
# QUI RICARICO IL MODELLO UNA VOLTA TRAINATO
model.load_state_dict(torch.load('best_model.pt'))

# QUI CREO IL DATABASE DI FEATURE VECTORS DEL TRAINING SET
# gli embeddings vengono aggiunti nel file csv per non rifarlo ad ogni allenamento
df_enc = get_encoding_csv(model, df['Anchor'], real_data_dir, fake_data_dir)

100%|██████████| 8000/8000 [09:09<00:00, 14.56it/s]


In [32]:
# QUI IL DATABASE COME CSV IN MODO TALE DA NON DOVER FARE QUESTA OPERAZIONE OGNI VOLTA
# OVVIAMENTE, SE DEVO FARE UN NUOVO TRAINING DEVO ANCHE RICREARE GLI ENCODINGS
df_enc.to_csv('database.csv', index = False)

df_enc = pd.read_csv('database.csv')
df_enc.head()

Unnamed: 0,Anchor,0,1,2,3,4,5,6,7,8,...,502,503,504,505,506,507,508,509,510,511
0,coco/coco2017/test2017/img040098.jpg,0.15964,0.054152,0.364224,-0.20111,-0.104188,-0.126346,0.07923,0.289032,-0.309731,...,0.210002,-0.044827,0.141645,0.310006,0.102627,-0.080483,-0.240478,0.199235,0.018837,-0.020826
1,latentdiff-t2i/images/img009822.jpg,0.149158,0.057689,0.355407,-0.197555,-0.084483,-0.134156,0.093355,0.28652,-0.307592,...,0.220658,-0.042755,0.131508,0.309401,0.100807,-0.065847,-0.234253,0.211387,0.030263,-0.049613
2,coco/coco2017/train2017/img141504.jpg,0.174549,0.097916,0.356932,-0.2369,-0.064447,-0.092629,0.137812,0.321238,-0.272702,...,0.221885,-0.082382,0.153006,0.283265,0.116988,-0.042605,-0.251136,0.239386,0.008668,-0.071179
3,coco/coco2017/test2017/img001997.jpg,0.155925,0.05802,0.344176,-0.201897,-0.112124,-0.147898,0.06881,0.290274,-0.289013,...,0.204049,-0.048589,0.150834,0.308076,0.103733,-0.066793,-0.22225,0.210142,0.048103,-0.028085
4,coco/coco2017/test2017/img006486.jpg,0.153231,0.058798,0.360315,-0.201963,-0.080359,-0.126246,0.105663,0.304313,-0.305242,...,0.230245,-0.047763,0.131382,0.304406,0.111906,-0.066981,-0.247726,0.220985,0.027719,-0.047383


In [33]:
# approssimazione della distanza, senza la radice quadrata, per fare i primi
# allenamenti velocemente
def euclidean_dist(img_enc, anc_enc_arr):
    #dist = np.sqrt(np.dot(img_enc-anc_enc_arr, (img_enc- anc_enc_arr).T))
    dist = np.dot(img_enc-anc_enc_arr, (img_enc- anc_enc_arr).T)
    #dist = np.sqrt(dist)
    return dist

In [34]:
path = os.path.join(Path(os.getcwd()).parent, 'datasets', 'testList.csv')
df = pd.read_csv(path)

print(df['real'])
print(df.size)

df.head()

0       coco/coco2017/train2017/img059641.jpg
1       coco/coco2017/train2017/img124043.jpg
2       coco/coco2017/train2017/img045848.jpg
3       coco/coco2017/train2017/img053090.jpg
4        coco/coco2017/test2017/img001097.jpg
                        ...                  
1595     coco/coco2017/test2017/img026670.jpg
1596     coco/coco2017/test2017/img020016.jpg
1597     coco/coco2017/test2017/img039996.jpg
1598    coco/coco2017/train2017/img084695.jpg
1599    coco/coco2017/train2017/img049458.jpg
Name: real, Length: 1600, dtype: object
3200


Unnamed: 0,real,fake
0,coco/coco2017/train2017/img059641.jpg,latentdiff/719/img006890.jpg
1,coco/coco2017/train2017/img124043.jpg,latentdiff/321/img002486.jpg
2,coco/coco2017/train2017/img045848.jpg,latentdiff-t2i/images/img007589.jpg
3,coco/coco2017/train2017/img053090.jpg,latentdiff/565/img005180.jpg
4,coco/coco2017/test2017/img001097.jpg,latentdiff/41/img003460.jpg


In [35]:
def getImageEmbeddings(img, model):

    # img = np.expand_dims(img, 0)
    # img = torch.from_numpy(img) / 255
    img = torch.from_numpy(img).permute(2, 0, 1) / 255.0
    model.eval()

    with torch.no_grad():
        img = img.to(DEVICE)
        img_enc = model(img.unsqueeze(0))
        img_enc = img_enc.detach().cpu().numpy()
        img_enc = np.array(img_enc)

    return img_enc

In [36]:
def searchInDatabase(img_enc, database):
    anc_enc_arr = database.iloc[:, 1:].to_numpy()
    anc_img_names = database['Anchor']

    distance = []
    for i in range(anc_enc_arr.shape[0]):
        dist = euclidean_dist(img_enc, anc_enc_arr[i : i+1, :])
        distance = np.append(distance, dist)

    closest_idx = np.argsort(distance)

    return database['Anchor'][closest_idx[0]]

In [37]:
# DataTestReal = 'C:/Users/polsi/Desktop/Lavori/DeepFake/Datasets/Artifact/cycle_gan/st/test/'
path = Path(os.getcwd()).parent.parent
real_data_dir = os.path.join(path, "artifact", "coco")
fake_data_dir = os.path.join(path, "artifact", "latent_diffusion")

y_true = []
y_pred = []
tempDf = df
tempDf.head()
tempDf.shape

(1600, 2)

In [38]:
# testo i fake
currentTest = 'fake'
database = df_enc
# prendo i primi 500 Fake
for index, row in tqdm(tempDf.iterrows()):
    path = os.path.join(fake_data_dir, row[currentTest])
    img_name = path

    img = io.imread(img_name)

    img_enc = getImageEmbeddings(img, model)

    closestLabel = searchInDatabase(img_enc, database)

    if "coco" in closestLabel:
        y_pred.append("real")
    else:
        y_pred.append("fake")

1600it [06:50,  3.90it/s]


In [39]:
print(len(y_true))
print(len(y_pred))
print(y_pred)

0
1600
['real', 'real', 'real', 'real', 'fake', 'real', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'real', 'real', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'real', 'real', 'fake', 'real', 'real', 'real', 'fake', 'real', 'fake', 'real', 'real', 'real', 'fake', 'fake', 'real', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'real', 'real', 'real', 'fake', 'fake', 'real', 'real', 'real', 'real', 'real', 'fake', 'real', 'real', 'fake', 'real', 'fake', 'fake', 'fake', 'real', 'fake', 'real', 'fake', 'real', 'real', 'fake', 'real', 'fake', 'real', 'real', 

In [40]:
database = df_enc

In [41]:
# testo i real
currentTest = 'real'
# prendo i primi 500 Fake
for index, row in tqdm(tempDf.iterrows()):
    path = os.path.join(real_data_dir, row[currentTest])
    img_name = path

    img_enc = getImageEmbeddings(img, model)

    closestLabel = searchInDatabase(img_enc, database)
    if "coco" in closestLabel:
        y_pred.append("real")
    else:
        y_pred.append("fake")


1600it [05:10,  5.16it/s]


In [42]:
print(len(y_true))
print(len(y_pred))
print(y_pred)

0
3200
['real', 'real', 'real', 'real', 'fake', 'real', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'fake', 'real', 'real', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'fake', 'real', 'real', 'fake', 'real', 'real', 'real', 'fake', 'real', 'fake', 'real', 'real', 'real', 'fake', 'fake', 'real', 'real', 'real', 'fake', 'fake', 'real', 'fake', 'fake', 'fake', 'fake', 'real', 'real', 'real', 'real', 'real', 'fake', 'fake', 'real', 'real', 'real', 'real', 'real', 'fake', 'real', 'real', 'fake', 'real', 'fake', 'fake', 'fake', 'real', 'fake', 'real', 'fake', 'real', 'real', 'fake', 'real', 'fake', 'real', 'real', 

In [43]:
# creo i vettori di ground truth
# y_true = np.array(['fake'] * 1523)
y_true = np.array(['fake'] * 1600)
print(y_true.shape)

# temp = np.array(['real'] * 1523)
temp = np.array(['real'] * 1600)
print(temp.shape)

# y_true = np.concatenate([y_true, temp])
y_true = np.concatenate([y_true, temp])
print(y_true.shape)

# calcolo la matrice di confusione (quella di scikit-learn dispone i risultati come nella cella di sotto)
confusion_matrix(y_true, y_pred, labels=["real", "fake"])

(1600,)
(1600,)
(3200,)


array([[   0, 1600],
       [ 804,  796]], dtype=int64)

In [44]:
# estraggo dalla matrice di confusione i True Negative, False Positive, False Negative, True Positive
TN, FP, FN, TP = confusion_matrix(y_true, y_pred, labels=["real", "fake"]).ravel()

In [58]:
# calcolo alcune metriche per vedere come si comporta
accuracy = round((TP + TN) /(TP + TN + FP + FN), 4) * 100
precision = round((TP) / (TP + FP), 4) * 100
sensitivy_recall = round((TP) / (TP + FN), 4) * 100
specificity = round((TN) / (TN + FP) * 100, 4)
F1_score = round((2* precision * sensitivy_recall) / (precision + sensitivy_recall), 2)

#Salvataggio del file csv
df_result = pd.DataFrame({"Precision": [precision], "Sensitivity":[sensitivy_recall], "Specificity":[specificity],"f1_score":[F1_score]})
output_dir_result = os.path.join(Path(os.getcwd()).parent, "results")
output_dir_result = os.path.join(output_dir_result, "results.csv")
df_result.to_csv(output_dir_result,index=False)

print({"Accuracy":accuracy,"Precision":precision,"Sensitivity_recall":sensitivy_recall, "Specificity": specificity, "F1_score":F1_score})

{'Accuracy': 24.88, 'Precision': 33.22, 'Sensitivity_recall': 49.75, 'Specificity': 0.0, 'F1_score': 39.84}
