In [None]:
import os
import sys

from pathlib import Path


# si aggiunge al path la cartella utils per avere visibilità del module
module_path = Path(os.getcwd()).parent.parent
module_path = os.path.join(module_path, "project-detective")

sys.path.append(module_path)

In [None]:
import timm
import torch
import numpy as np
import pandas as pd
import utils.mining as mining
import utils.datasets as build
import matplotlib.pyplot as plt
import torch.nn.functional as F

from torch import nn
from tqdm import tqdm
from skimage import io
from sklearn.metrics import confusion_matrix
from torch.utils.data import Dataset, DataLoader
from pytorch_metric_learning import miners, losses
from sklearn.model_selection import train_test_split

In [None]:
# serve per ricaricare il codice modificato
%load_ext autoreload
%autoreload 2

In [None]:
# configurazione
batch_size=32
lr=0.001
epochs=30
device="cuda"

# per far funzionare il modello su immagini rgb o in scala di grigi (per usare fourier)
mode="fourier"

# margin per semi-hard mining con modello pre-allenato
margin=0.2

In [None]:
# directory da dove vengono prelevate le immagini
if mode == "rgb":
    path = Path(os.getcwd()).parent.parent

    fake_data_dir = os.path.join(path, "artifact", "taming_transformer")
    real_data_dir = os.path.join(path, "artifact", "coco")

else: 
    path = Path(os.getcwd()).parent
    fake_data_dir = os.path.join(path, "temp", "taming_transformer+coco", "train", "taming_transformer")
    real_data_dir = os.path.join(path, "temp", "taming_transformer+coco", "train", "coco")

In [None]:
# carica le immagini nel dataset
class ApnDataset(Dataset):

  def __init__(self, df):
    self.df = df

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    
    if mode == "rgb":
      # le immagini Anchor sono memorizzate in due dataset diversi
      if str(row.Anchor).startswith("coco"):
        a_img = io.imread(os.path.join(real_data_dir, row.Anchor))
        p_img = io.imread(os.path.join(real_data_dir, row.Positive))
        n_img = io.imread(os.path.join(fake_data_dir, row.Negative))

        a_label = 0
        p_label = 0
        n_label = 1

      else:
        a_img = io.imread(os.path.join(fake_data_dir, row.Anchor))
        p_img = io.imread(os.path.join(fake_data_dir, row.Positive))
        n_img = io.imread(os.path.join(real_data_dir, row.Negative))

        a_label = 1
        p_label = 1
        n_label = 0

      # normalizzazione per immagini in rgb 
      a_img = torch.from_numpy(a_img).permute(2, 0, 1) / 255.0
      p_img = torch.from_numpy(p_img).permute(2, 0, 1) / 255.0
      n_img = torch.from_numpy(n_img).permute(2, 0, 1) / 255.0

      a_label = torch.tensor(a_label)
      p_label = torch.tensor(p_label)
      n_label = torch.tensor(n_label)

    if mode == "fourier":
      if "real" in row.Anchor:
        a_img = io.imread(os.path.join(real_data_dir, row.Anchor))
        p_img = io.imread(os.path.join(real_data_dir, row.Positive))
        n_img = io.imread(os.path.join(fake_data_dir, row.Negative))

        a_label = 0
        p_label = 0
        n_label = 1

      else:
        a_img = io.imread(os.path.join(fake_data_dir, row.Anchor))
        p_img = io.imread(os.path.join(fake_data_dir, row.Positive))
        n_img = io.imread(os.path.join(real_data_dir, row.Negative))

        a_label = 1
        p_label = 1
        n_label = 0

      # normalizzazione immagini nello spettro di fourier
      a_img = np.expand_dims(a_img, 0)
      p_img = np.expand_dims(p_img, 0)
      n_img = np.expand_dims(n_img, 0)
      
      a_img = torch.from_numpy(a_img) / 255.0
      p_img = torch.from_numpy(p_img) / 255.0
      n_img = torch.from_numpy(n_img) / 255.0

    # A_img = torch.from_numpy(A_img.astype(np.int32)) / 65536.0
    # P_img = torch.from_numpy(P_img.astype(np.int32)) / 65536.0
    # N_img = torch.from_numpy(N_img.astype(np.int32)) / 65536.0

    return a_img, p_img, n_img, a_label, p_label, n_label

In [None]:
# classe per caricare il modello di rete neurale direttamente dalle repository online
class ApnModel(nn.Module):

  # size del vettore di embedding
  def __init__(self, emb_size=512):
    super(ApnModel, self).__init__()

    # caricamento del modello, in questo caso efficientnet b0 (architettura più leggera della famiglia)
    self.efficientnet = timm.create_model("tf_efficientnetv2_b0", pretrained=False)
    self.efficientnet.classifier = nn.Linear(in_features=self.efficientnet.classifier.in_features, out_features=emb_size)

  def forward(self, images):
    embeddings = self.efficientnet(images)
    return embeddings

In [None]:
# classe del modello che genera gli embedding per applicare il semi-hard mining
class EmbModel(nn.Module):

    # size del vettore di embedding
    def __init__(self, emb_size = 512):
        super(EmbModel, self).__init__()

        # gli embedding vengono creati con un modello preallenato (risultato più efficace in test precedenti, nel caso di fourier è allenato a mano)
        self.efficientnet = timm.create_model("tf_efficientnetv2_b0", pretrained=False)
        self.efficientnet.classifier = nn.Linear(in_features=self.efficientnet.classifier.in_features, out_features=emb_size)

    def forward(self, images):
        embeddings = self.efficientnet(images)
        return embeddings

In [None]:
# funzione per creare embeddings che sarranno sottoposti a semi-hard mining
def create_embeddings(model, dataloader, device): 
    # off dropout
    model.eval()

    list_df = []

    with torch.no_grad():
        for a, p, n, al, pl, nl in tqdm(dataloader, desc="creating embeddings..."):
            a, p, n = a.to(device), p.to(device), n.to(device)

            temp_df_embs = pd.DataFrame(columns=["Anchor_embs", "Positive_embs", "Negative_embs"])

            a_embs = model(a)
            p_embs = model(p)
            n_embs = model(n)
            
            # la batch size può variare, perciò ci si basa sulla lunghezza del tensore
            batch_size = len(a_embs)
            
            # ad ogni batch corrisponde un dataframe
            for i in range(batch_size): 
                # si serializzano gli array np in stringhe in modo da memorizzarli nelle celle del datagrame
                a, p, n = a_embs[i].cpu().numpy(), p_embs[i].cpu().numpy(), n_embs[i].cpu().numpy()
                a, p, n = np.array2string(a, separator=','), np.array2string(p, separator=','), np.array2string(n, separator=',')
                
                temp_df_embs.loc[i] = [
                    a, 
                    p, 
                    n
                ]
            
            list_df.append(temp_df_embs)

    # concatenazione di tutti i dataframe
    df_embs = pd.concat(list_df)

    return df_embs

In [None]:
emb_model = EmbModel()

# per processare le immagini in scala di grigi per fare fourier serve una CNN 2D
if mode == "fourier":
    emb_model.efficientnet.conv_stem = nn.Conv2d(1, 32, 3, 2, 1, bias=False)
    emb_model.load_state_dict(torch.load("fourier_emb_model.pt"))

emb_model.to(device);

In [None]:
if mode == "rgb":
    df_out_path = os.path.join("..", "datasets", "out.csv")
    df_out = pd.read_csv(df_out_path)

else: 
    df_out_path = os.path.join("..", "datasets", "fourier_out.csv")
    df_out = pd.read_csv(df_out_path)

In [None]:
apn_dataset = ApnDataset(df_out)
dataloader = DataLoader(apn_dataset, batch_size=batch_size)

In [None]:
emb_csv_path = os.path.join("..", "notebooks", "embeddings.csv")

# si controlla che siano stati già creati gli embeddings
if not Path(emb_csv_path).is_file():
    df_emb = create_embeddings(emb_model, dataloader, device)
    df_emb.to_csv(emb_csv_path, index=False)

df_emb = pd.read_csv(emb_csv_path)

In [None]:
# si concatenano i dataframe delle immagini e degli embeddings sulle colonne per poter filtrare le righe in logica di semi-hard mining
df_out = pd.concat([df_out, df_emb], axis=1)

# offline semi-hard mining dei triplet
df_out = mining.offline_semi_hard_mining(df_out, margin)
df_out = df_out.drop(["Anchor_embs", "Positive_embs", "Negative_embs"], axis=1)

print(f"dataset size after semi-hard mining: {len(df_out)}")

In [None]:
# funzione di train
def train_fn(model, dataloader, optimizer, criterion, miner):
  # on dropout 
  model.train()
  
  total_loss = 0.0

  for a, p, n, al, pl, nl in tqdm(dataloader, desc="model training..."):
    a, p, n = a.to(device), p.to(device), n.to(device)
    al, pl, nl = al.to(device), pl.to(device), nl.to(device)

    optimizer.zero_grad()

    # qui vengono creati gli embeddings, le cui distanze verranno calcolate dopo
    a_embs = model(a)
    p_embs = model(p)
    n_embs = model(n)

    # per usare l'ohm si devono concatenare tutti i tipi di immagine, i triplet verranno creati nella funzione di loss
    embeddings = torch.cat((a_embs, p_embs, n_embs), axis=0)
    labels = torch.cat((al, pl, nl), axis=0)

    # online hard mining prima del calcolo della loss
    miner_output = miner(embeddings, labels)
    loss = criterion(embeddings, labels, miner_output)
    
    loss.backward()
    
    optimizer.step()

    total_loss += loss.item()

  return total_loss / len(dataloader)

In [None]:
# funzione di evaluation
def eval_fn(model, dataloader, criterion, miner):
  # off dropout
  model.eval() 
  
  total_loss = 0.0

  with torch.no_grad():
    for a, p, n, al, pl, nl in tqdm(dataloader, desc="model validating..."):
      a, p, n = a.to(device), p.to(device), n.to(device)
      al, pl, nl = al.to(device), pl.to(device), nl.to(device)

      a_embs = model(a)
      p_embs = model(p)
      n_embs = model(n)
      
      embeddings = torch.cat((a_embs, p_embs, n_embs), axis=0)
      labels = torch.cat((al, pl, nl), axis=0)
      
      miner_output = miner(embeddings, labels)
      loss = criterion(embeddings, labels, miner_output)
      

      total_loss += loss.item()

    return total_loss / len(dataloader)

In [None]:
model = ApnModel()

# per processare le immagini in scala di grigi per fare fourier serve una CNN 2D
if mode == "fourier":
    model.efficientnet.conv_stem = nn.Conv2d(1, 32, 3, 2, 1, bias=False)

model.to(device);

In [None]:
# split del nuovo dataframe
train_df, valid_df = train_test_split(df_out, test_size=0.20, random_state=42)

trainset = ApnDataset(train_df)
validset = ApnDataset(valid_df)

trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
validloader = DataLoader(validset, batch_size=batch_size)

In [None]:
# triplet loss, miner (per online hard mining) e adam
criterion = losses.TripletMarginLoss(triplets_per_anchor="all")
miner = miners.TripletMarginMiner(margin=margin, type_of_triplets="hard")
# criterion = nn.TripletMarginLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
# training
best_valid_loss = np.Inf

training_epoch_loss = []
validation_epoch_loss = []

for i in range(epochs):
  train_loss = train_fn(model, trainloader, optimizer, criterion, miner)
  valid_loss = eval_fn(model, validloader, criterion, miner)

  training_epoch_loss.append(train_loss)
  validation_epoch_loss.append(valid_loss)

  if valid_loss < best_valid_loss:
    torch.save(model.state_dict(), "best_model.pt")
    best_valid_loss = valid_loss
    print("successful weights saving...")

  print(f"epochs: {i+1}, train_loss: {train_loss}, valid_loss: {valid_loss}")

In [None]:
# plot della training e validation loss
plt.plot(training_epoch_loss, label="train_loss")
plt.plot(validation_epoch_loss, label="val_loss")
plt.legend()

plt.show

In [None]:
# funzione per generare i vettori di encoding
def get_encoding_csv(model, anc_img_names, dir_folder):
  anc_img_names_arr = np.array(anc_img_names)
  encodings = []

  model.eval()

  with torch.no_grad():
    for i in tqdm(anc_img_names_arr, desc="creating encodings..."):
      if mode == "rgb":
        if str(i).startswith("coco"):
          dir_folder = real_data_dir
        else: 
          dir_folder = fake_data_dir
        
        a = io.imread(os.path.join(dir_folder, i))
        a = torch.from_numpy(a).permute(2, 0, 1) / 255.0
      
      if mode == "fourier":
        if "real" in str(i):
          dir_folder = real_data_dir
        else: 
          dir_folder = fake_data_dir

        a = io.imread(os.path.join(dir_folder, i))
        a = np.expand_dims(a, 0)
        a = torch.from_numpy(a.astype(np.int32)) / 255.0
        
      a = a.to(device)
      a_enc = model(a.unsqueeze(0))
      encodings.append(a_enc.squeeze().cpu().detach().numpy())

    encodings = np.array(encodings)
    encodings = pd.DataFrame(encodings)
    anc_img_names_df = pd.DataFrame(anc_img_names_arr, columns=['Anchor'])
    df_enc = pd.concat([anc_img_names_df, encodings], axis=1)

    return df_enc

In [None]:
model = ApnModel()

if mode == "fourier":
    model.efficientnet.conv_stem = nn.Conv2d(1, 32, 3, 2, 1, bias=False)

model.to(device);

In [None]:
# per ricaricare il modello una volta allenato
model.load_state_dict(torch.load("best_model.pt"))

# si creano gli embeddings che vengono memorizzati per non rifarlo ad ogni allenamento
df_enc = get_encoding_csv(model, df_out["Anchor"], real_data_dir)
df_enc.to_csv("database.csv", index=False)

In [None]:
df_enc = pd.read_csv('database.csv')
df_enc.head()

In [None]:
# approssimazione della distanza, senza la radice quadrata, per fare i primi allenamenti velocemente
def euclidean_dist(img_enc, anc_enc_arr):
    # dist = np.sqrt(np.dot(img_enc-anc_enc_arr, (img_enc- anc_enc_arr).T))
    dist = np.dot(img_enc - anc_enc_arr, (img_enc - anc_enc_arr).T)
    # dist = np.sqrt(dist)
    return dist

In [None]:
if mode == "rgb":
    fake_dataset_path = os.path.join(fake_data_dir, "metadata.csv")
    real_dataset_path = os.path.join(real_data_dir, "metadata.csv")

    test_df_path = os.path.join("..", "datasets", "testList.csv")
    build.test(fake_dataset_path, real_dataset_path, df_out, test_df_path)
    test_df = pd.read_csv(test_df_path)

else: 
    path = Path(os.getcwd()).parent
    fake_data_dir = os.path.join(path, "temp", "taming_transformer+coco", "test", "taming_transformer")
    real_data_dir = os.path.join(path, "temp", "taming_transformer+coco", "test", "coco")
    
    test_df_path = os.path.join("..", "datasets", "fourier_test_list.csv")
    test_df = pd.read_csv(test_df_path)

print(test_df["real"])
print(test_df.size)
test_df.head()

In [None]:
def get_image_embeddings(img, model):
    if mode == "rgb":
        img = torch.from_numpy(img).permute(2, 0, 1) / 255.0
      
    if mode == "fourier":
        img = np.expand_dims(img, 0)
        img = torch.from_numpy(img) / 255
    
    model.eval()
    with torch.no_grad():
        img = img.to(device)
        img_enc = model(img.unsqueeze(0))
        img_enc = img_enc.detach().cpu().numpy()
        img_enc = np.array(img_enc)

    return img_enc

In [None]:
def search_in_database(img_enc, database):
    anc_enc_arr = database.iloc[:, 1:].to_numpy()
    anc_img_names = database["Anchor"]

    distance = []
    for i in range(anc_enc_arr.shape[0]):
        dist = euclidean_dist(img_enc, anc_enc_arr[i : i+1, :])
        distance = np.append(distance, dist)

    closest_idx = np.argsort(distance)

    return database["Anchor"][closest_idx[0]]

In [None]:
y_true = []
y_pred = []

temp_df = test_df
temp_df.head()
temp_df.shape

In [None]:
# testo i fake
current_test = "fake"
database = df_enc

fake_images = temp_df[current_test].dropna()
print(len(fake_images))

# prendo i primi 500 Fake
for i in tqdm(fake_images, desc="testing on fake images..."):
    path = os.path.join(fake_data_dir, i)
    img_name = path

    img = io.imread(img_name)
    img_enc = get_image_embeddings(img, model)
    closest_label = search_in_database(img_enc, database)
    
    if mode == "rgb":
        if str(closest_label).startswith("coco"):
            y_pred.append("real")
        else:
            y_pred.append("fake")
    
    else: 
        if "real" in str(closest_label):
            y_pred.append("real")
        else:
            y_pred.append("fake")

In [None]:
print(len(y_true))
print(len(y_pred))
print(y_pred)

In [None]:
# testo i real
current_test = "real"
database = df_enc

real_images = temp_df[current_test]
print(len(real_images))

for i in tqdm(real_images, desc="testing on real images..."):
    path = os.path.join(real_data_dir, i)
    img_name = path

    img = io.imread(img_name)
    img_enc = get_image_embeddings(img, model)
    closest_label = search_in_database(img_enc, database)
    
    if mode == "rgb":
        if str(closest_label).startswith("coco"):
            y_pred.append("real")
        else:
            y_pred.append("fake")
    
    else: 
        if "real" in str(closest_label):
            y_pred.append("real")
        else:
            y_pred.append("fake")

In [None]:
print(len(y_true))
print(len(y_pred))
print(y_pred)

In [None]:
# creo i vettori di ground truth
y_true = np.array(["fake"] * len(temp_df["fake"].dropna()))
temp = np.array(["real"] * len(temp_df["real"]))
y_true = np.concatenate([y_true, temp])

# calcolo la matrice di confusione (quella di scikit-learn dispone i risultati come nella cella di sotto)
cm = confusion_matrix(y_true, y_pred, labels=["real", "fake"])
print(cm)

In [None]:
tn, fp, fn, tp = cm.ravel()

# metriche
accuracy = round((tp + tn) / (tp + tn + fp + fn), 4) * 100
precision = round((tp) / (tp + fp), 4) * 100
recall = round((tp) / (tp + fn), 4) * 100
specificity = round((tn) / (tn + fp) * 100, 4)
f1_score = round((2 * precision * recall) / (precision + recall), 4)

print({"Accuracy":accuracy, "Precision":precision, "Recall":recall, "Specificity":specificity, "F1 Score":f1_score})

In [None]:
# si salvano i risultati in un file .csv
df_results = pd.DataFrame(columns=["Accuracy", "Precision", "Recall", "Specificity", "F1 Score"])
df_results.loc[0] = [accuracy, precision, recall, specificity, f1_score]

# si differenziano i risultati in base al tipo di immagini e dataset usati
dataset = fake_data_dir.split("\\")[-1]
path = os.path.join("..", "results", "rgb_mining", "siamese_" + mode + "_" + "pretrained_semi_hard_online_hard_" + dataset + "_results.csv")

df_results.to_csv(path, index=False)