In [2]:
import os
import sys

from pathlib import Path

# si aggiunge al path la cartella utils per avere visibilità del modulo
module_path = Path(os.getcwd()).parent.parent
module_path = os.path.join(module_path, "project-detective")

sys.path.append(module_path)

In [3]:
import cv2
import timm
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
import utils.semi_hard_mining as shm

from torch import nn
from tqdm import tqdm
from skimage import io
from pathlib import Path
from sklearn.metrics import confusion_matrix
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# serve per ricaricare automaticamente il codice modificato
%load_ext autoreload
%autoreload 2

In [5]:
# directory da dove vengono prelevate le immagini
path = Path(os.getcwd()).parent.parent
coco_data_dir = os.path.join(path, "artifact", "coco")
tt_data_dir = os.path.join(path, "artifact", "taming_transformer")
ld_data_dir = os.path.join(path, "artifact", "latent_diffusion")
bg_data_dir = os.path.join(path, "artifact", "big_gan")


# per far funzionare il modello su immagini rgb o in scala di grigi (per usare fourier)
mode="rgb"

BATCH_SIZE = 32

LR = 0.001

EPOCHS = 30

DEVICE = "cpu"

In [6]:
real_csv_path = os.path.join("..", "datasets", "real.csv")
fake_csv_path = os.path.join("..", "datasets", "fake.csv")

df_real = pd.read_csv(real_csv_path)
df_fake = pd.read_csv(fake_csv_path)

In [7]:
# carica le immagini nel dataset
class APN_Dataset_real(Dataset):

  def __init__(self, df):
    self.df = df

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    
    if mode == "rgb":
      # le immagini Anchor sono memorizzate in due dataset diversi
        A_img = io.imread(os.path.join(coco_data_dir, row.real))
        # normalizzazione per immagini in rgb 
        A_img = torch.from_numpy(A_img).permute(2, 0, 1) / 255.0


    if mode == "grey_scale":
      A_img = np.expand_dims(A_img, 0)
      
      A_img = torch.from_numpy(A_img) / 255.0

    # A_img = torch.from_numpy(A_img.astype(np.int32)) / 65536.0
    # P_img = torch.from_numpy(P_img.astype(np.int32)) / 65536.0
    # N_img = torch.from_numpy(N_img.astype(np.int32)) / 65536.0

    return A_img

In [8]:
# carica le immagini nel dataset
class APN_Dataset_fake(Dataset):

  def __init__(self, df):
    self.df = df

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    
    if mode == "rgb":
      # le immagini Anchor sono memorizzate in due dataset diversi
      if "tt" in row.fake:
        A_img = io.imread(os.path.join(tt_data_dir, row.fake))
      
      if "latentdiff" in row.fake:
        A_img = io.imread(os.path.join(ld_data_dir, row.fake))

      if "big" in row.fake:
        A_img = io.imread(os.path.join(bg_data_dir, row.fake))

      # normalizzazione per immagini in rgb 
      A_img = torch.from_numpy(A_img).permute(2, 0, 1) / 255.0

    if mode == "grey_scale":
      A_img = np.expand_dims(A_img, 0)

      
      A_img = torch.from_numpy(A_img) / 255.0


    # A_img = torch.from_numpy(A_img.astype(np.int32)) / 65536.0
    # P_img = torch.from_numpy(P_img.astype(np.int32)) / 65536.0
    # N_img = torch.from_numpy(N_img.astype(np.int32)) / 65536.0

    return A_img

In [9]:
dataset_real = APN_Dataset_real(df_real)
dataset_fake = APN_Dataset_fake(df_fake)

dataloader_real = DataLoader(dataset_real, batch_size = BATCH_SIZE, shuffle = False)
dataloader_fake = DataLoader(dataset_fake, batch_size = BATCH_SIZE, shuffle = False)

In [10]:
# funzione per caricare il modello di rete neurale direttamente dalle repository online
class APN_Model_Pretrained(nn.Module):

  # size del vettore di embedding
  def __init__(self, emb_size = 512):
    super(APN_Model_Pretrained, self).__init__()

    # caricamento del modello, in questo caso efficientnet b0 (architettura più leggera della famiglia)
    self.efficientnet = timm.create_model("tf_efficientnetv2_b0", pretrained=True)
    self.efficientnet.classifier = nn.Linear(in_features=self.efficientnet.classifier.in_features, out_features=emb_size)

  def forward(self, images):
    embeddings = self.efficientnet(images)
    return embeddings

In [11]:
model = APN_Model_Pretrained()

# per processare le immagini in scala di grigi per fare fourier serve una CNN 2D
if mode == "grey_scale":
    model.efficientnet.conv_stem = nn.Conv2d(1, 32, 3, 2, 1, bias=False)

model.to(DEVICE)

APN_Model_Pretrained(
  (efficientnet): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (drop_path): Identity()
        )
      )
      (1): Sequential(
        (0): EdgeResidual(
          (conv_exp): Conv2dSame(16, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
          (bn1): BatchNormAct2d(
            64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
      

In [12]:
def create_embeddings_real(model, dataloader, batch_size, ds) :
    model.eval()
    out = pd.DataFrame(columns=["Real", "R_embs"])
    i=0
    with torch.no_grad():
        
        for A in tqdm(dataloader, desc="create embeddings real..."):
            A= A.to(DEVICE)

            # qui vengono creati gli embeddings, le cui distanze verranno calcolate dopo
            A_embs = model(A)
            
            current_batch_size = A_embs.size(0)

            for j in range (current_batch_size):
                if i >= len(df_real):
                    break
                out.loc[i] = [
                ds.iloc[i]["real"], 
                A_embs[j].cpu()
                ] 
            i = i+1
                
    out.to_csv("real_embs.csv",index=False)
    return out

In [13]:
def create_embeddings_fake(model, dataloader, batch_size, ds) :
    model.eval()
    out = pd.DataFrame(columns=["Fake", "F_embs"])
    i=0
    with torch.no_grad():
        
        for A in tqdm(dataloader, desc="create embeddings fake..."):
            A = A.to(DEVICE)

            # qui vengono creati gli embeddings, le cui distanze verranno calcolate dopo
            A_embs = model(A)
            
            current_batch_size = A_embs.size(0)

            for j in range (current_batch_size):
                if i >= len(df_fake):
                    break
                out.loc[i] = [
                ds.iloc[i]["fake"], 
                A_embs[j].cpu()
                ] 
            i = i+1
                
    out.to_csv("fake_embs.csv",index=False)
    return out

In [14]:
embeddings_real = create_embeddings_real(model, dataloader_real, BATCH_SIZE, df_real)
embeddings_fake = create_embeddings_fake(model, dataloader_fake, BATCH_SIZE, df_fake)


create embeddings real...: 100%|██████████| 3438/3438 [45:36<00:00,  1.26it/s]
create embeddings fake...: 100%|██████████| 3125/3125 [39:50<00:00,  1.31it/s]


In [15]:
#train_df1, valid_df1 = train_test_split(shm.semi_hard_mining(embeddings_real, embeddings_fake, 0.2), test_size=0.20, random_state=42)

#trainset1 = APN_Dataset_real(train_df1)
#validset1 = APN_Dataset_fake(valid_df1)


#trainloader_semi_hard= DataLoader(trainset1, batch_size = BATCH_SIZE, shuffle = True)
#validloader_semi_hard = DataLoader(validset1, batch_size = BATCH_SIZE)