In [1]:
import cv2 as cv
import numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor 
import shutil, os
import polars as pl
import matplotlib.pyplot as plt

In [2]:
def get_saturation(img_path):
    bgr_img = cv.imread(img_path)
    hsv_img = cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV)
    
    saturation = hsv_img[:, :, 1]
    avg_saturation = np.mean(saturation)
    print(avg_saturation)

In [3]:
def get_mean(img_path):
    bgr_img = cv.imread(img_path)
    hsv_img = cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV)
    rgb_img = cv.cvtColor(hsv_img, cv.COLOR_HSV2RGB)
    print(rgb_img.mean())

In [4]:
def is_almost_gray(bgr_img:str)->bool:
    hsv_img = cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV)
    
    saturation = hsv_img[:, :, 1]
    avg_saturation = np.mean(saturation)
    
    is_gray = True if avg_saturation <22 else False
    return is_gray

In [5]:
def find_img_files(img_dir:str) -> list[Path]:
    img_suffixs = ['.png', '.jpg']
    dir_path = Path(img_dir)
    img_paths = [f for f in dir_path.iterdir() if f.suffix in img_suffixs]
    return img_paths

In [None]:
def get_imgs(paths:list[str])->list[np.ndarray]:
    max_threads_num = os.cpu_count()
    subset_size = len(paths)//max_threads_num

    if len(paths) <= max_threads_num:
        subsets = [paths]
    else: 
        subsets = [paths[i*subset_size:(i+1)*subset_size] for i in range(max_threads_num)]
        subsets[-1].extend(paths[max_threads_num*subset_size:])
        
    read_imgs = lambda path_subset: [cv.imread(path) for path in path_subset]
    with ThreadPoolExecutor(max_threads_num) as exe:
        futures = [exe.submit(read_imgs, subset) for subset in subsets]
    
    bgr_imgs = []
    for future in futures:
        bgr_imgs.extend(future.result())
    return bgr_imgs

In [7]:
def gray_seperation(img_dir):
    img_dir = Path(img_dir)
    img_paths = find_img_files(img_dir)
    gray_img_paths = [img for img in img_paths if is_almost_gray(img)]
    
    seperation_dir = img_dir.parent/'gray'/img_dir.name
    seperation_dir.mkdir(exist_ok=True, parents=True)

    for gray_img in gray_img_paths:
        shutil.move(gray_img, seperation_dir/gray_img.name)

In [8]:
def get_histogram_df(std_array:np.ndarray, distance:int = 5):
    boundary = np.arange(0, std_array.max()+distance, distance)
    histogram = np.histogram(std_array, boundary)
    
    col_name = list(histogram[1][1:].astype(int).astype(str))
    col_data = histogram[0].reshape(1,-1)

    histogram_df = pl.DataFrame(col_data, col_name)
    return histogram_df

In [9]:
Ipath = r"E:\Datasets\deep_real\deepfake and real images\train\Real"
paths = list(Path(Ipath).iterdir())
bgr_imgs = get_imgs(paths)
img_num = len(paths)

h_stds, s_stds = np.zeros(img_num), np.zeros(img_num)
for idx, bgr_img in enumerate(bgr_imgs):
    h, s, v = cv.split(cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV))
    h_stds[idx] = s.std()
    s_stds[idx] = h.std()

In [10]:
get_histogram_df(h_stds, 5)

5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
0,0,3,26,109,365,824,1309,1720,1997,1882,1700,1333,1008,633,393,201,113,52,35,13,4,1


In [11]:
get_histogram_df(s_stds, 5)

5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
66,236,325,451,613,687,890,1274,1587,1795,1827,1430,1135,724,409,207,62,3


In [None]:
single_tone_path = r"E:\Datasets\deep_real\deepfake and real images\train\hand_refine\color_refine"
single_tone_paths = list(Path(single_tone_path).iterdir())
print(single_tone_paths)
single_tone_imgs = get_imgs(single_tone_paths)
print(single_tone_imgs)
img_num = len(single_tone_paths)

h_stds, s_stds = np.zeros(img_num), np.zeros(img_num)
for idx, bgr_img in enumerate(single_tone_imgs):
    h, s, v = cv.split(cv.cvtColor(bgr_img, cv.COLOR_BGR2HSV))
    h_stds[idx] = s.std()
    s_stds[idx] = h.std()

[WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_22160.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_23681.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_23684.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_24976.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_25834.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_29887.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_3086.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_3088.jpg'), WindowsPath('E:/Datasets/deep_real/deepfake and real images/train/hand_refine/color_refine/fake_33858.jpg'), WindowsPath('E:/Data

In [None]:
get_histogram_df(std_array=h_stds)

5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
13678,1,3,2,6,6,3,5,3,1,2,1,3,3,2,2


In [18]:
get_histogram_df(s_stds)

5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
13680,1,4,1,3,6,1,4,1,7,1,1,1,3,5,2


In [None]:
#중앙값 기반