In [None]:
import cv2 as cv
import numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor 
import shutil, os
import polars as pl
import matplotlib.pyplot as plt

In [None]:
def get_saturation(img_path):
    bgr_img = cv.imread(img_path)
    hsv_img = cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV)
    
    saturation = hsv_img[:, :, 1]
    avg_saturation = np.mean(saturation)
    print(avg_saturation)

In [None]:
def get_mean(img_path):
    bgr_img = cv.imread(img_path)
    hsv_img = cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV)
    rgb_img = cv.cvtColor(hsv_img, cv.COLOR_HSV2RGB)
    print(rgb_img.mean())

In [None]:
def is_almost_gray(bgr_img:str)->bool:
    hsv_img = cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV)
    
    saturation = hsv_img[:, :, 1]
    avg_saturation = np.mean(saturation)
    
    is_gray = True if avg_saturation <22 else False
    return is_gray

In [None]:
def find_img_files(img_dir:str) -> list[Path]:
    img_suffixs = ['.png', '.jpg']
    dir_path = Path(img_dir)
    img_paths = [f for f in dir_path.iterdir() if f.suffix in img_suffixs]
    return img_paths

In [None]:
def get_imgs(paths:list[str])->np.ndarray[np.ndarray]:
    max_threads_num = os.cpu_count()
    subset_size = len(paths)//max_threads_num

    if len(paths) <= max_threads_num:
        subsets = [paths]
    else: 
        subsets = [paths[(i)*subset_size:(i+1)*subset_size] for i in range(max_threads_num)]
        subsets[-1].extend(paths[max_threads_num*subset_size:])
        
    read_imgs = lambda path_subset: [cv.imread(path) for path in path_subset]
    with ThreadPoolExecutor(max_threads_num) as executor:
        futures = [executor.submit(read_imgs, subset) for subset in subsets]
    
    bgr_imgs = []
    for future in futures:
        bgr_imgs.extend(future.result())
    return np.ndarray(bgr_imgs)

In [None]:
def gray_seperation(img_dir):
    img_dir = Path(img_dir)
    img_paths = find_img_files(img_dir)
    gray_img_paths = [img for img in img_paths if is_almost_gray(img)]
    
    seperation_dir = img_dir.parent/'gray'/img_dir.name
    seperation_dir.mkdir(exist_ok=True, parents=True)

    for gray_img in gray_img_paths:
        shutil.move(gray_img, seperation_dir/gray_img.name)

In [None]:
def get_histogram_df(std_array:np.ndarray, distance:int = 5):
    boundary = np.arange(0, std_array.max()+distance, distance)
    histogram = np.histogram(std_array, boundary)
    
    col_name = list(histogram[1][1:].astype(int).astype(str))
    col_data = histogram[0].reshape(1,-1)

    histogram_df = pl.DataFrame(col_data, col_name)
    return histogram_df

In [None]:
Ipath = r"E:\Datasets\deep_real\deepfake and real images\train\Real"
paths = list(Path(Ipath).iterdir())
bgr_imgs = get_imgs(paths)
img_num = len(paths)

h_stds, s_stds = np.zeros(img_num), np.zeros(img_num)
for idx, bgr_img in enumerate(bgr_imgs):
    h, s, v = cv.split(cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV))
    h_stds[idx] = s.std()
    s_stds[idx] = h.std()

In [None]:
get_histogram_df(h_stds, 5)

In [None]:
get_histogram_df(s_stds, 5)

In [None]:
single_tone_path = r"E:\Datasets\deep_real\deepfake and real images\train\hand_refine\color_refine"
single_tone_paths = list(Path(single_tone_path).iterdir())
print(single_tone_paths)
single_tone_imgs = get_imgs(single_tone_paths)
print(single_tone_imgs)
img_num = len(single_tone_paths)

h_stds, s_stds = np.zeros(img_num), np.zeros(img_num)
for idx, bgr_img in enumerate(single_tone_imgs):
    h, s, v = cv.split(cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV))
    h_stds[idx] = s.std()
    s_stds[idx] = h.std()

In [None]:
get_histogram_df(std_array=h_stds)

In [None]:
get_histogram_df(s_stds)

In [None]:
#중앙값 기반