In [None]:
# ! pip install kaggle
# ! mkdir ~/.kaggle
# !cp /content/kaggle.json ~/.kaggle/kaggle.json
# ! chmod 600 ~/.kaggle/kaggle.json

In [None]:
# !kaggle datasets download -d ishanikathuria/handwritten-signature-datasets
# !unzip /content/handwritten-signature-datasets

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

# File paths
image_paths = [
    '/content/CEDAR/CEDAR/1/forgeries_1_1.png',
    '/content/CEDAR/CEDAR/1/forgeries_1_10.png',
    '/content/CEDAR/CEDAR/1/forgeries_1_6.png',
    '/content/CEDAR/CEDAR/1/original_1_1.png',
    '/content/CEDAR/CEDAR/1/original_1_10.png',
    '/content/CEDAR/CEDAR/1/original_1_4.png',

]

# Labels
labels = [
    'forgeries_1_1',
    'forgeries_1_10',
    'forgeries_1_6',
    'original_1_1',
    'original_1_10',
    'original_1_4',

]

# Plotting images
fig, axes = plt.subplots(1, 6, figsize=(20, 5))
for ax, img_path, label in zip(axes, image_paths, labels):
    image = Image.open(img_path)
    ax.imshow(image, cmap='gray')
    ax.set_title(label)
    ax.axis('off')

plt.tight_layout()
plt.show()



In [None]:
import os

def count_folders_in_directory(directory):
    folder_count = 0
    for entry in os.listdir(directory):
        if os.path.isdir(os.path.join(directory, entry)):
            folder_count += 1
    return folder_count

directory_path = '/content/CEDAR/CEDAR/'
print(f'Number of folders: {count_folders_in_directory(directory_path)}')


In [None]:

for i in range(1, 56):
    input_paths = sorted([
            os.path.join(directory_path, fname )
            for fname in os.listdir(directory_path)
            if os.path.exists(os.path.join(directory_path, fname ))])

print(input_paths)
input_img_paths = []

for path in input_paths:
    # Check if the input directory exists
    if not os.path.exists(path):
        raise ValueError(f"The directory {path} does not exist")

    # Check the contents of the directory
    try:
        file_names = os.listdir(path)
    except Exception as e:
        raise ValueError(f"Error reading the directory {path}: {e}")

    # Filter and sort valid image paths
    input_img_paths.extend([
        os.path.join(path, fname)
        for fname in file_names
        if os.path.exists(os.path.join(path, fname))
    ])

# Sort the collected paths
input_img_paths = sorted(input_img_paths)

# Print the results for debugging
print("Input image paths:")
print(input_img_paths)


In [None]:
import re
import csv
import random

# Define the paths
input_paths = '/content/'

# Define regular expressions for forged and real data
pattern_forged = re.compile(r'forgeries_\d+_\d+\.png', re.IGNORECASE)
pattern_real = re.compile(r'original_\d+_\d+\.png', re.IGNORECASE)

# Collect all image paths
input_img_paths = input_img_paths

# Separate forged and real image paths
forged_image_paths = [path for path in input_img_paths if pattern_forged.search(os.path.basename(path))]
real_image_paths = [path for path in input_img_paths if pattern_real.search(os.path.basename(path))]
print(len(forged_image_paths))
print(len(real_image_paths))

# Ensure there are enough images to select pairs
if len(forged_image_paths) < 1320 or len(real_image_paths) < 1320:
    raise ValueError("Not enough forged or real images to create 1320 pairs.")

# Randomly shuffle the lists to ensure randomness
random.shuffle(forged_image_paths)
random.shuffle(real_image_paths)

# Select 1320 pairs
selected_pairs = [
    {"image_path_one": forged_image_paths[i], "image_path_two": real_image_paths[i], "label": 1}
    for i in range(1320)
]


# Define the CSV file path
csv_file_path = '/content/selected_images.csv'

# Write data to CSV
with open(csv_file_path, mode='w', newline='') as csv_file:
    fieldnames = ['image_path_one', 'image_path_two', 'label']

    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)


    writer.writeheader()
    for data in selected_pairs:
        writer.writerow(data)



print(f"Selected images have been saved to {csv_file_path}")


csv_file_path_one = '/content/selected_images_one.csv'

selected_pairs_one = [
    {"image_path_one": real_image_paths[i], "image_path_two": real_image_paths[i], "label": 0}
    for i in range(1320)
]

with open(csv_file_path_one, mode='w', newline='') as csv_file_one:
    fieldnames_one = ['image_path_one', 'image_path_two', 'label']
    writer_one = csv.DictWriter(csv_file_one, fieldnames=fieldnames_one)


    writer_one.writeheader()
    for data in selected_pairs_one:
        writer_one.writerow(data)

print(f"Selected images have been saved to {csv_file_path_one}")




In [None]:
# Define the paths to your CSV files
file1_path = '/content/selected_images.csv'
file2_path = '/content/selected_images_one.csv'
combined_file_path = 'CEDARDATAset.csv'

# Read the contents of both CSV files
data = []

for file_path in [file1_path, file2_path]:
    with open(file_path, mode='r') as file:
        reader = csv.DictReader(file)
        data.extend(list(reader))

# Get the fieldnames from the first file (assuming both have the same structure)
fieldnames = data[0].keys()

# Write the combined data to a new CSV file
with open(combined_file_path, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)

    writer.writeheader()
    for row in data:
        writer.writerow(row)

print(f"Files have been combined and saved to {combined_file_path}")


In [8]:
csv_file_path = '/content/CEDARDATAset.csv'

with open(csv_file_path, mode='r') as file:
    reader = csv.reader(file)
    row_count = sum(1 for row in reader)

# Subtract 1 if your CSV has a header row
row_count -= 1

print(f'The number of rows in the CSV file is: {row_count}')


The number of rows in the CSV file is: 2640
