## Joining channels

In [1]:
import pandas as pd
import numpy as np
import os

gen_image_path = '../data/features/synthbuster/wavelet1'

for gen in os.listdir(gen_image_path):
    features_path = os.path.join(gen_image_path, gen, 'features')
    for dir in os.listdir(features_path):
        # to allow to re-run the script without manually deleting the files
        if dir == "all_wavelets":
            continue
        for dataset in os.listdir(os.path.join(features_path, dir)):
            path = os.path.join(features_path, dir, dataset)
            # Read R features from RGB_features folder
            R = pd.read_csv(os.path.join(path, 'features_red.csv'), index_col=0)
            G = pd.read_csv(os.path.join(path, 'features_green.csv'), index_col=0)
            B = pd.read_csv(os.path.join(path, 'features_blue.csv'), index_col=0)

            # Add R prefix to all columns in R_test_features
            R = R.add_prefix('R_')
            G = G.add_prefix('G_')
            B = B.add_prefix('B_')
            
            # print filenames with nan values
            print('R:', R[R.isnull().any(axis=1)]['R_Image'])
            print('G:', G[G.isnull().any(axis=1)]['G_Image'])
            print('B:', B[B.isnull().any(axis=1)]['B_Image'])

            # Delete Image, Mask and Category columns from G and B
            G = G.drop(columns=['G_Image', 'G_Mask', 'G_Category'])
            B = B.drop(columns=['B_Image', 'B_Mask', 'B_Category'])

            # Remove R_ prefix from R_Image, R_Mask and R_Category columns in R
            R = R.rename(columns={'R_Image': 'Image', 'R_Mask': 'Mask', 'R_Category': 'Category'})

            # Merge R, G and B features
            RGB = pd.concat([R, G, B], axis=1)

            # Modify Image and Mask columns to leave only the file name and the nearest parent folder
            # remove R_ prefix from file names
            RGB['Image'] = RGB['Image'].str.replace('R_', '')
            RGB['Image'] = RGB['Image'].apply(lambda x: x.split('/')[-3] + '/' + x.split('/')[-2] + '/' + x.split('/')[-1])
            RGB['Mask'] = RGB['Mask'].apply(lambda x: x.split('/')[-1:][0])
            RGB.head()
            print(path)
            # Save RGB features
            RGB.to_csv(os.path.join(path, 'features_RGB.csv'))


R: Series([], Name: R_Image, dtype: object)
G: Series([], Name: G_Image, dtype: object)
B: Series([], Name: B_Image, dtype: object)
../data/features/synthbuster/wavelet1\dalle2\features\coif1\train_test
R: Series([], Name: R_Image, dtype: object)
G: Series([], Name: G_Image, dtype: object)
B: Series([], Name: B_Image, dtype: object)
../data/features/synthbuster/wavelet1\dalle2\features\coif5\train_test
R: Series([], Name: R_Image, dtype: object)
G: Series([], Name: G_Image, dtype: object)
B: Series([], Name: B_Image, dtype: object)
../data/features/synthbuster/wavelet1\dalle2\features\db1\train_test
R: Series([], Name: R_Image, dtype: object)
G: Series([], Name: G_Image, dtype: object)
B: Series([], Name: B_Image, dtype: object)
../data/features/synthbuster/wavelet1\dalle2\features\rbio6_8\train_test
R: Series([], Name: R_Image, dtype: object)
G: Series([], Name: G_Image, dtype: object)
B: Series([], Name: B_Image, dtype: object)
../data/features/synthbuster/wavelet1\dalle3\features\co

## Joining wavelet features

In [4]:
import pandas as pd
import numpy as np
import os


gen_image_path = '../data/features/synthbuster/wavelet1'

for gen in os.listdir(gen_image_path):
    features_path = os.path.join(gen_image_path, gen, 'features')

    df_train = pd.DataFrame()
    df_test = pd.DataFrame()

    # we are first iterating over datasets (test, train etc.) because we want to merge all wavelet features for each dataset
    for dataset in os.listdir(os.path.join(features_path, dir)):
        
        for dir in os.listdir(features_path):
            # to allow to re-run the script without manually deleting the files
            if dir == "all_wavelets":
                continue
            
            path = os.path.join(features_path, dir, dataset)
            
            # read RGB features
            df = pd.read_csv(os.path.join(path, 'features_RGB.csv'), index_col=0)
            
            # drop diagnostics, 'Image', 'Mask' and 'Category' columns
            to_drop = list()
            for column in df.columns:
                if dir != 'coif1' and ('diagnostics' in column or column == 'Image' or column == 'Category' or column == 'Mask'):
                    to_drop.append(column)
                elif 'diagnostics' in column:
                    to_drop.append(column)
                    
            df.drop(to_drop, axis=1, inplace=True)

            # add prefix to all columns
            df = df.add_prefix(f'{dir}_')
            
            # Merge RGB features with df_train and df_test
            if dataset == 'train' or dataset == 'train_test':
                df_train = pd.concat([df_train, df], axis=1)
            elif dataset == 'test' or dataset == 'val':
                df_test = pd.concat([df_test, df], axis=1)
            
        # Save df_train and df_test
        os.makedirs(os.path.join(features_path, 'all_wavelets', dataset), exist_ok=True)
        df_train.to_csv(os.path.join(features_path, 'all_wavelets', dataset, 'features_RGB.csv'))

## Concantenate

In [11]:
import pandas as pd
import numpy as np
import os

gen_image_path = '../data/features/synthbuster/wavelet1'
gen_standard_path = '../data/features/synthbuster/standard'
combined_path = '../data/features/synthbuster/combined'

for gen in os.listdir(gen_image_path):
    
    print(f"Processing {gen}")
    
    features_path = os.path.join(gen_image_path, gen, 'features', 'all_wavelets')
    standard_features_path = os.path.join(gen_standard_path, gen, 'features', 'extraction_params')
       
    for dataset in os.listdir(standard_features_path):
        df_train = pd.read_csv(os.path.join(features_path, dataset, 'features_RGB.csv'), index_col=0)
        train_main = pd.read_csv(os.path.join(standard_features_path, dataset, 'features_RGB.csv'), index_col=0)
        
        # drop wavelet columns to avoid duplicates
        to_drop = list()
        for column in train_main.columns:
            if 'wavelet' in column:
                to_drop.append(column)       
        train_main.drop(to_drop, axis=1, inplace=True)    
        df_train.drop(columns=df_train.select_dtypes(include=['object']).columns, inplace=True)
        combined_train = pd.concat([train_main, df_train], axis=1)     
        os.makedirs(os.path.join(combined_path, gen, dataset), exist_ok=True)
        combined_train.to_csv(os.path.join(combined_path, gen, dataset, 'features_RGB.csv'))

Processing dalle2
Processing dalle3
Processing firefly
Processing glide
Processing img
Processing midjourney-v5
Processing stable-diffusion-1-3
Processing stable-diffusion-1-4
Processing stable-diffusion-2
Processing stable-diffusion-xl
