In [1]:
from PIL import Image
import numpy as np

def generate_primes(n):
    """n개의 소수를 생성하는 함수"""
    primes = []
    num = 2
    while len(primes) < n:
        if all(num % i != 0 for i in range(2, int(np.sqrt(num)) + 1)):
            primes.append(num)
        num += 1
    return primes

def calculate_unique_value(image_path):
    """BMP 이미지로부터 고유한 정수값을 계산하는 함수"""
    # 이미지 불러오기
    image = Image.open(image_path)
    image = image.convert('L')  # 회색조 이미지로 변환
    pixels = np.array(image)

    # 소수 생성
    primes = generate_primes(28 * 28)

    # 계산
    unique_value = 0
    for i in range(28):
        for j in range(28):
            # 픽셀 위치에 따른 소수 할당
            prime = primes[i * 28 + j]
            # 픽셀값에 1을 더한 후 소수와 곱함
            unique_value += (pixels[i, j] + 1) * prime

    return unique_value

In [2]:
import os
import shutil

def process_images(source_dir, dest_dir):
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    processed_files = []
    duplicate_values = set()

    for filename in os.listdir(source_dir):
#         if filename.lower().endswith('.btm'):  # BTM 이미지만 처리
        source_path = os.path.join(source_dir, filename)
        unique_value = calculate_unique_value(source_path)
        print(filename + ":" + str(unique_value))
        new_filename = f"{unique_value}.bmp"
        dest_path = os.path.join(dest_dir, new_filename)

        # 중복 검사
        if unique_value in processed_files:
            print(f"경고: 중복된 값 발견 - {unique_value}")
            duplicate_values.add(unique_value)
        else:
            processed_files.append(unique_value)
            shutil.copy(source_path, dest_path)

    return processed_files, duplicate_values


In [3]:
# source_dir = 'C:/Users/koo/my_workspace/privacy_term_project/mnist_btm/testing'
# dest_dir =   'C:/Users/koo/my_workspace/privacy_term_project/mnist_btm_hash/testing'
# source_dir = 'C:/Users/koo/my_workspace/privacy_term_project/mnist_btm/training'
# dest_dir =   'C:/Users/koo/my_workspace/privacy_term_project/mnist_btm_hash/training'

# for i in range(0, 10):
#     processed_files, duplicates = process_images(source_dir + '/' +str(i) , dest_dir + '/' +str(i) )
#     print("처리된 파일들:", processed_files)
#     if duplicates:
#         print("중복된 값들:", duplicates)



In [4]:
import os

file_list = []
def list_in_mnist_btm_hash(base_dir):
    sub_dirs = ['testing', 'training']
    for sub_dir in sub_dirs:
        path = os.path.join(base_dir, sub_dir)
        for i in range(10):
            folder_path = os.path.join(path, str(i))
            if os.path.exists(folder_path):
                file_list.extend(os.listdir(folder_path))

    #print(f"총 파일 개수: {len(file_list)}")


base_dir = 'mnist_btm_hash'  # 'mnist_btm_hash' 폴더의 경로
list_in_mnist_btm_hash(base_dir)
print(len(file_list))

69997


In [5]:
# "119551584.btm" in file_list

In [6]:
base_dir = 'random_pertubation_image_by_adv'  # 'mnist_btm_hash' 폴더의 경로
path = base_dir
for i in range(10):
    folder_path = os.path.join(path, str(i))
    if os.path.exists(folder_path):
        file_list.extend(os.listdir(folder_path))

list_in_mnist_btm_hash(base_dir)
print(len(file_list))

78809
