Nel seguente file avviene la creazione del dataset "spectrum_train_dataset.csv" contenente 60.000 path di immagini traslate nello spazio delle frequenze con Fourier. 30.000 path di immagini fake sono stati presi dalle cartelle "biggan_spectrum", latendiff_spectrum" e "tt-cc_spectrum" (taming_transformer"), e 30.000 reali sono stati presi dalla cartella "coco", più nello specifico dalla sotto-cartella "train_spectrum".

In [1]:
import pandas as pd
import numpy as np
import os
import random

In [2]:
current_dir = os.getcwd()
# Definiamo le cartelle delle immagini
real_image_folder = os.path.join(current_dir, "dataset", "coco", "coco", "coco2017", "train_spectrum") #Spettri di immagini reali
biggan_spectrum_path = os.path.join(current_dir, "dataset", "big_gan", "big", "biggan-spectrum") #Spettri di immagini generate dal modello BigGan
latentdiff_spectrum_path = os.path.join(current_dir, "dataset", "latent_diffusion", "latentdiff_spectrum") #Spettri di immagini generate dal modello Latent Diffusion
taming_transformer_spectrum_path = os.path.join(current_dir, "dataset", "taming_transformer", "tt-cc_spectrum") #Spettri di immagini generate dal modello Tamming Transformer

fake_image_folders = [biggan_spectrum_path, latentdiff_spectrum_path, taming_transformer_spectrum_path]

In [3]:
# Ottieniamo i percorsi relativi per le immagini reali
real_image_paths = []
for root, dirs, files in os.walk(real_image_folder):
    for filename in files:
        if filename.endswith(".jpg"):
            real_image_paths.append(os.path.relpath(os.path.join(root, filename)))

# Otteniamo i percorsi delle immagini per ciascuna cartella di immagini false
biggan_image_paths = []
for root, dirs, files in os.walk(biggan_spectrum_path):
    for filename in files:
        if filename.endswith(".jpg"):
            biggan_image_paths.append(os.path.relpath(os.path.join(root, filename)))

latentdiff_image_paths = []
for root, dirs, files in os.walk(latentdiff_spectrum_path):
    for filename in files:
        if filename.endswith(".jpg"):
            latentdiff_image_paths.append(os.path.relpath(os.path.join(root, filename)))

taming_transformer_image_paths = []
for root, dirs, files in os.walk(taming_transformer_spectrum_path):
    for filename in files:
        if filename.endswith(".jpg"):
            taming_transformer_image_paths.append(os.path.relpath(os.path.join(root, filename)))

In [4]:
real_image_paths_unique = random.sample(list(real_image_paths), 15000)
biggan_image_paths_sample = random.sample(list(biggan_image_paths), 5000)
latentdiff_image_paths_sample = random.sample(list(latentdiff_image_paths), 5000)
taming_transformer_image_paths_sample = random.sample(list(taming_transformer_image_paths), 5000)

In [5]:
fake_image_paths_unique = biggan_image_paths_sample + latentdiff_image_paths_sample + taming_transformer_image_paths_sample

In [6]:
# Creiamo una lista di tuple che rappresentano le righe del dataframe
rows_real = []
rows_fake = []
for _ in range(15000):
    anchor_real = random.choice(real_image_paths_unique)
    positive_real = random.choice(real_image_paths_unique)
    negative_fake = random.choice(fake_image_paths_unique)
    rows_real.append((anchor_real, positive_real, negative_fake))

for _ in range(15000):
    anchor_fake = random.choice(fake_image_paths_unique)
    positive_fake = random.choice(fake_image_paths_unique)
    negative_real = random.choice(real_image_paths_unique)
    rows_fake.append((anchor_fake, positive_fake, negative_real))

In [7]:
# Mescoliamo casualmente le righe
random.shuffle(rows_real)
random.shuffle(rows_fake)

In [8]:
df_real = pd.DataFrame(rows_real, columns=['Anchor', 'Positive', 'Negative'])
df_fake = pd.DataFrame(rows_fake, columns=['Anchor', 'Positive', 'Negative'])

In [9]:
df_real.to_csv('real_image_dataset_coco2.csv', index= False)
df_fake.to_csv('fake_image_dataset_biggan_latent_taming.csv', index= False)