In [1]:
import numpy as np
import os, random
from PIL import Image
import json
import glob

In [2]:
# imagenet_mean = 0.448
# imagenet_std = 0.226

def make_image_format_v1(tensor, wp_mean, wp_std, c):
    tensor = (tensor - wp_mean) / wp_std
    tensor = tensor * imagenet_std * c + imagenet_mean
    tensor = np.clip(tensor, 0, 1)
    tensor = tensor * 255  # Grayscale image data (100x100 pixels)
    tensor = tensor.astype(np.uint8)
    return tensor

def reverse_image_format_v1(tensor, wp_mean, wp_std, c):
    # uint8에서 float로 변환하고 255로 나누어 0-1 범위로 정규화
    tensor = tensor.astype(np.float32) / 255.0
    
    # 클리핑을 무시하므로 클리핑 이전의 값으로 가정
    tensor = tensor - imagenet_mean  # imagenet_mean을 빼기
    tensor = tensor / (imagenet_std * c)  # (imagenet_std * c)로 나누기
    tensor = tensor * wp_std + wp_mean  # wp_std로 곱하고 wp_mean을 더하기
    return tensor


def make_image_format(w, mu, s, c):
    th = c * s
    w = np.clip(w, mu - th, mu + th)
    w = 255 * (w - mu + th) / (2 * th)
    w = w.astype(np.uint8)
    return w

def reverse_image_format(w, mu, s, c):
    th = c * s
    # w를 float 타입으로 변환하여 역연산
    w = w.astype(np.float64)
    # [0, 255] 범위를 [mu - th, mu + th] 범위로 되돌리기
    w = (w / 255) * (2 * th) + (mu - th)
    return w

def calculate_mse(array1, array2):
    # 두 배열의 차이를 계산
    difference = array1 - array2
    # 차이의 제곱을 계산
    squared_difference = np.square(difference)
    # 제곱된 차이의 평균을 계산
    mse = np.mean(squared_difference)
    return mse

from PIL import Image
import numpy as np

def png_to_numpy_array(file_path):
    # PNG 파일 읽기
    image = Image.open(file_path)
    # 이미지를 NumPy 배열로 변환
    image_array = np.array(image)
    return image_array

## random sample N image shaped wp dataset
then, save N npys, N images

In [4]:
image_path = '/home/jgryu/Weight_compression/JPEG/wp_image'
npy_path = '/home/jgryu/Weight_compression/JPEG/wp_npy'
# dim = 256
N = 1000
# dataset_folders = ["/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/meta-llama-3-8b_mlp_val_json", "/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/meta-llama-3-8b_attn_val_json"]
dataset_folders = ["/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json", "/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_mlp_train_json"]
for dataset_f in dataset_folders:
    for dim in [64, 256, 512, 1024]:
        dataset_folder = dataset_f + f'/{dim}_{dim}'

        directories = [os.path.join(dataset_folder, d) for d in os.listdir(dataset_folder) if os.path.isdir(os.path.join(dataset_folder, d))]
        print(directories)
        wp_path_list = glob.glob(f'{directories[0]}/**/*.npy', recursive=True)
        print(len(wp_path_list))
        print(wp_path_list[0])

        random.seed(100)
        if len(wp_path_list) > N:
            random_list = random.sample(wp_path_list, N)
            wp_path_list = random_list

        mean = np.load(dataset_folder + f'/mean_value.npy')
        std = np.load(dataset_folder + f'/std_value.npy')

        image_save_path = os.path.join(image_path, "/".join(dataset_folder.split('/')[-2:]))
        npy_save_path = os.path.join(npy_path, "/".join(dataset_folder.split('/')[-2:]))

        os.makedirs(npy_save_path, exist_ok = True)
        for i in range(len(wp_path_list)):
            n = np.load(wp_path_list[i])
            np.save(npy_save_path + f'/{i}.npy', n)
            
        np.save(npy_save_path + f'/mean_value.npy', mean)
        np.save(npy_save_path + f'/std_value.npy', std)

        os.makedirs(image_save_path, exist_ok = True)
        # for c in [0.1, 0.3, 0.5, 0.7, 1, 1.5, 2, 3]:
        for t in [1, 1.2, 1.5, 2, 2.5, 3, 3.5, 4, 5]:
            for i in range(len(wp_path_list)):
                n = np.load(wp_path_list[i])
                n = make_image_format(n, mu=mean, s=std, c=t)
                # print(n.shape, n.max(), n.min(), n.mean()), n.std()
                
                img = Image.fromarray(n, mode='L')  # 'L' mode is for grayscale
                directory = f"{image_save_path}/t={t}"
                # if not os.path.exists(directory):
                #     os.mkdir(directory)
                os.makedirs(directory, exist_ok=True)
                img.save(f"{directory}/{i}.png")

['/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json/64_64/Llama-2-7b-hf']
417792
/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json/64_64/Llama-2-7b-hf/model-layers-17-self_attn-q_proj-weight_npy/1104.npy
['/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json/256_256/Llama-2-7b-hf']
26112
/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json/256_256/Llama-2-7b-hf/model-layers-17-self_attn-q_proj-weight_npy/186.npy
['/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json/512_512/Llama-2-7b-hf']
6528
/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json/512_512/Llama-2-7b-hf/model-layers-17-self_attn-q_proj-weight_npy/1.npy
['/home/jgryu/Weight_compression/Wparam_dataset/image_shape_wp/llama-2-7b_attn_train_json/1024_1024/Llama-2-7b-hf']
1632
/home/jgryu/Weight_compression/Wparam_