In [18]:
import os
from PIL import Image
import os
import re
import matplotlib.pyplot as plt
import numpy as np


# Define the paths to the two folders
dataset_GT_path = "./../data/Flickr2K"  # Path to the folder with high-resolution images
dataset_WR_path = "./../data/Flickr2K_LR"  # Path to the folder with low-resolution image

output_path = './../data/Flickr2k_LRWRGT' 

### Renaming the files to match Flickr2K format

In [3]:
# Loop through all files in folder2
for filename in os.listdir(dataset_WR_path):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        # Construct the full file path
        old_file_path = os.path.join(dataset_WR_path, filename)
        
        # Extract the numerical part from the original filename
        match = re.search(r'\d+', filename)
        if match:
            number = match.group(0).zfill(6)  # Pad the number with leading zeros to ensure it is 6 digits
            new_filename = f"{number}" + os.path.splitext(filename)[1]  # Retain the file extension
            new_file_path = os.path.join(dataset_WR_path, new_filename)
            
            # Rename the file
            os.rename(old_file_path, new_file_path)

print("Renaming completed.")

Renaming completed.


## Calculate ratios

In [5]:
# Initialize an empty list to store the downscaling ratios
downscaling_ratios = []

# Loop through all files in folder1
for filename in os.listdir(dataset_GT_path):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        filename = filename.split('.')[0]
        # Construct the full file paths
        file1 = os.path.join(dataset_GT_path, filename + '.png')
        file2 = os.path.join(dataset_WR_path, filename + '.jpeg')
        
        # Check if the corresponding file exists in folder2
        if os.path.exists(file2):
            # Open the images
            with Image.open(file1) as img1, Image.open(file2) as img2:
                # Get the dimensions of the images
                width1, height1 = img1.size
                width2, height2 = img2.size
                
                # Calculate the downscaling ratios
                ratio_x = width2 / width1
                ratio_y = height2 / height1
                
                # Store the ratios in the list
                downscaling_ratios.append((ratio_x, ratio_y))
        # else:
        #     print('file 2 not found:', file2)


## print ratios

In [6]:
# Separate the x and y ratios
x_ratios = [ratio[0] for ratio in downscaling_ratios]
y_ratios = [ratio[1] for ratio in downscaling_ratios]

# Function to print the distinct values with their frequencies
def print_ratio_frequencies(ratios, axis_label):
    unique, counts = np.unique(ratios, return_counts=True)
    print(f"Frequencies of Downscaling Ratios for {axis_label} Dimension:")
    for value, count in zip(unique, counts):
        print(f"Value: {value:.4f}, Frequency: {count}")

# # Print the downscaling ratios for x and y dimensions
# print_ratio_frequencies(x_ratios, 'X')
# print_ratio_frequencies(y_ratios, 'Y')


# Function to print the distinct pairs with their frequencies
def print_ratio_pair_frequencies(ratios):
    unique, counts = np.unique(ratios, axis=0, return_counts=True)
    print("Frequencies of Downscaling Ratio Pairs (X, Y):")
    for value, count in zip(unique, counts):
        if count > 4:
            print(f"Pair: (X = {value[0]:.4f}, Y = {value[1]:.4f}), Frequency: {count}")

# Print the downscaling ratio pairs for x and y dimensions
print_ratio_pair_frequencies(downscaling_ratios)

os.makedirs()

Frequencies of Downscaling Ratio Pairs (X, Y):
Pair: (X = 0.7839, Y = 0.7843), Frequency: 24
Pair: (X = 0.7841, Y = 0.7843), Frequency: 12
Pair: (X = 0.7842, Y = 0.7843), Frequency: 5
Pair: (X = 0.7843, Y = 0.7838), Frequency: 5
Pair: (X = 0.7843, Y = 0.7838), Frequency: 5
Pair: (X = 0.7843, Y = 0.7839), Frequency: 180
Pair: (X = 0.7843, Y = 0.7841), Frequency: 65
Pair: (X = 0.7843, Y = 0.7842), Frequency: 32
Pair: (X = 0.7843, Y = 0.7842), Frequency: 24
Pair: (X = 0.7843, Y = 0.7843), Frequency: 11
Pair: (X = 0.7890, Y = 0.7883), Frequency: 11
Pair: (X = 0.8333, Y = 0.8333), Frequency: 10
Pair: (X = 1.0000, Y = 1.0000), Frequency: 12


In [29]:
# Make folders in output folder
dataset_LR_path_output = os.path.join(output_path, 'dataset_LRW')
dataset_WR_path_output = os.path.join(output_path, 'dataset_WR')
dataset_GT_path_output = os.path.join(output_path, 'dataset_GT')

os.makedirs(dataset_LR_path_output, exist_ok=True)
os.makedirs(dataset_WR_path_output, exist_ok=True)
os.makedirs(dataset_GT_path_output, exist_ok=True)

print(os.listdir(dataset_WR_path))


for filename in os.listdir(dataset_WR_path):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        base_filename = filename.split('.')[0]

        print("filename", base_filename)

        # Construct the full file paths
        file_GT = os.path.join(dataset_GT_path, base_filename + '.png')
        file_WR = os.path.join(dataset_WR_path, base_filename + '.jpeg')
        
        # Check if the corresponding file exists in the GT folder
        if os.path.exists(file_GT) and os.path.exists(file_WR):
            with Image.open(file_GT) as img_GT, Image.open(file_WR) as img_WR:
                # Get the dimensions of the GT image
                width_GT, height_GT = img_GT.size
                
                # Calculate the new dimensions for the LR image
                new_width = width_GT // 2
                new_height = height_GT // 2
                
                # Resize the LR image
                img_LR= img_WR.resize((new_width, new_height), Image.LANCZOS)
                
                img_LR.save(os.path.join(dataset_LR_path_output, base_filename + '.png'))
                img_WR.save(os.path.join(dataset_WR_path_output, base_filename + '.png'))
                img_GT.save(os.path.join(dataset_GT_path_output, base_filename + '.png'))

                print('saved', base_filename)




['000289.jpeg', '000323.jpeg', '000266.jpeg', '000374.jpeg', '000231.jpeg', '000118.jpeg', '000362.jpeg', '000227.jpeg', '000335.jpeg', '000270.jpeg', '000159.jpeg', '000437.jpeg', '000067.jpeg', '000088.jpeg', '000122.jpeg', '000319.jpeg', '000030.jpeg', '000460.jpeg', '000175.jpeg', '000476.jpeg', '000026.jpeg', '000499.jpeg', '000163.jpeg', '000358.jpeg', '000071.jpeg', '000421.jpeg', '000134.jpeg', '000143.jpeg', '000456.jpeg', '000006.jpeg', '000285.jpeg', '000114.jpeg', '000051.jpeg', '000401.jpeg', '000378.jpeg', '000397.jpeg', '000102.jpeg', '000417.jpeg', '000047.jpeg', '000381.jpeg', '000155.jpeg', '000010.jpeg', '000440.jpeg', '000293.jpeg', '000339.jpeg', '000084.jpeg', '000207.jpeg', '000342.jpeg', '000196.jpeg', '000483.jpeg', '000179.jpeg', '000250.jpeg', '000315.jpeg', '000180.jpeg', '000495.jpeg', '000246.jpeg', '000303.jpeg', '000092.jpeg', '000138.jpeg', '000211.jpeg', '000354.jpeg', '000210.jpeg', '000355.jpeg', '000093.jpeg', '000139.jpeg', '000247.jpeg', '000302.j

In [39]:
import os
from collections import defaultdict

def get_patch_counts(directory):
    patch_counts = defaultdict(int)
    image_numbers = set()
    
    for filename in os.listdir(directory):
        if filename.endswith('.png'):
            image_number, patch_number = filename.split('_')
            patch_number = patch_number.split('.')[0]
            patch_counts[image_number] += 1
            image_numbers.add(image_number)
    
    return patch_counts, image_numbers


def compare_directories(dir1, dir2):
    patch_counts_dir1, image_numbers_dir1 = get_patch_counts(dir1)
    patch_counts_dir2, image_numbers_dir2 = get_patch_counts(dir2)
    
    unequal_patch_images = []
    all_image_numbers = image_numbers_dir1.union(image_numbers_dir2)
    
    missing_in_dir1 = image_numbers_dir2 - image_numbers_dir1
    missing_in_dir2 = image_numbers_dir1 - image_numbers_dir2
    
    for image_number in all_image_numbers:
        count_dir1 = patch_counts_dir1.get(image_number, 0)
        count_dir2 = patch_counts_dir2.get(image_number, 0)
        if count_dir1 != count_dir2:
            unequal_patch_images.append(image_number)
    
    return unequal_patch_images, missing_in_dir1, missing_in_dir2



dir1 = os.path.join(output_path, 'dataset_LRW_patches')
dir2 = os.path.join(output_path, 'dataset_GT_patches')

unequal_patches, missing_in_dir1, missing_in_dir2 = compare_directories(dir1, dir2)

print("Images with unequal patch counts:", unequal_patches)
print("Images missing in directory 1:", missing_in_dir1)
print("Images missing in directory 2:", missing_in_dir2)


Images with unequal patch counts: []
Images missing in directory 1: set()
Images missing in directory 2: set()


In [40]:
import os

def get_filenames(directory):
    filenames = set()
    for filename in os.listdir(directory):
        if filename.endswith('.png'):
            filenames.add(filename)
    return filenames

def find_missing_files(dir1, dir2):
    filenames_dir1 = get_filenames(dir1)
    filenames_dir2 = get_filenames(dir2)
    
    missing_in_dir1 = filenames_dir2 - filenames_dir1
    missing_in_dir2 = filenames_dir1 - filenames_dir2
    
    return missing_in_dir1, missing_in_dir2

# Example usage
dir1 = os.path.join(output_path, 'dataset_LRW_patches')
dir2 = os.path.join(output_path, 'dataset_GT_patches')

missing_in_dir1, missing_in_dir2 = find_missing_files(dir1, dir2)

print("Files present in directory 2 but not in directory 1:")
for filename in missing_in_dir1:
    print(filename)

print("\nFiles present in directory 1 but not in directory 2:")
for filename in missing_in_dir2:
    print(filename)


Files present in directory 2 but not in directory 1:

Files present in directory 1 but not in directory 2:


In [45]:
import os

def get_filenames(directory):
    filenames = set()
    for filename in os.listdir(directory):
        filenames.add(filename)
    print(directory, len(filenames))
    return filenames

def find_missing_files(dir1, dir2):
    filenames_dir1 = get_filenames(dir1)
    filenames_dir2 = get_filenames(dir2)
    
    missing_in_dir1 = filenames_dir2 - filenames_dir1
    missing_in_dir2 = filenames_dir1 - filenames_dir2
    
    return missing_in_dir1, missing_in_dir2

# Example usage
dir1 = os.path.join(output_path, 'dataset_LRW_patches')
dir2 = os.path.join(output_path, 'dataset_GT_patches')

missing_in_dir1, missing_in_dir2 = find_missing_files(dir1, dir2)

print("Files present in directory 2 but not in directory 1:")
for filename in missing_in_dir1:
    print(filename)

print("\nFiles present in directory 1 but not in directory 2:")
for filename in missing_in_dir2:
    print(filename)


./../data/Flickr2k_LRWRGT/dataset_LRW_patches 74866
./../data/Flickr2k_LRWRGT/dataset_GT_patches 74866
Files present in directory 2 but not in directory 1:

Files present in directory 1 but not in directory 2:
