In [1]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from io import BytesIO
from loguru import logger
import json

from config.carbon_mixed import *

input_folder_path = r'E:\record\sideview_mixed_dataset'
output_label_path = './output/label'
output_feature_path = './output/feature'
output_data_path = './output/data'

In [2]:
dataset_dirs = []


def list_directories(path):
    directories = [d for d in os.listdir(path)
                   if os.path.isdir(os.path.join(path, d))]
    return directories


dataset_dirs = list_directories(input_folder_path)
len(dataset_dirs)

90

In [3]:
analyze_conditions('POD_0.5_0.25_0.25_30C_100kPa_1', 10)


{'p_ratio': 50.0,
 'o_ratio': 25.0,
 'd_ratio': 25.0,
 'temperature': 30.0,
 'pressure': 100.0,
 'frame_id': 10}

In [4]:
def count_jpg_files(folder_path):
    jpg_files = [f for f in os.listdir(folder_path)
                 if f.lower().endswith('.jpg')]
    return len(jpg_files)

In [5]:
def exec_one_folder(input_path, use_index_or_filename=True):
	def list_img_files():
		files = os.listdir(input_path)
		jpg_files = sorted([file for file in files if file.endswith('jpg') and file[-5] != ')'])
		return jpg_files
	img_files = list_img_files()
	logger.success(f'{input_path} listed, {len(img_files)} imgs found.')

	base_idx = count_jpg_files(output_feature_path)
	start_idx = 0
	cnt = 0
	for idx, path in enumerate(img_files):
		frame_id = analyze_frame_id(path)
		if idx == 0 : start_idx = frame_id
		output_img = cv2.imread(f'{input_path}/{img_files[idx]}')
	
		data = analyze_conditions(input_path, frame_id - start_idx)
		values = list(data.values())
		pts = [[i, i] for i in range(len(values))] 
		matrix = np.zeros((len(values), len(values)))
		for value, pt in zip(values, pts):
			matrix[pt[0], pt[1]] = value
		plt.imshow(matrix, cmap='viridis', interpolation='nearest')
		plt.axis('off')
		
		buf = BytesIO()
		plt.savefig(buf, format='jpg', bbox_inches='tight', pad_inches=0, dpi=300)
		plt.close()
		buf.seek(0)
		input_img = np.frombuffer(buf.getvalue(), dtype=np.int8)
		input_img = cv2.imdecode(input_img, cv2.IMREAD_COLOR)
		height, width = output_img.shape[:2]
		input_img = cv2.resize(input_img, (width, height))

		if use_index_or_filename:
			real_idx = base_idx + cnt
			cv2.imwrite(f'{output_feature_path}/{real_idx}.jpg', input_img)
			cv2.imwrite(f'{output_label_path}/{real_idx}.jpg', output_img)
			with open(f'{output_data_path}/{real_idx}.json', 'w', encoding='utf-8') as fp:
				json.dump(data, fp, ensure_ascii=False, indent=4)
		else:
			cv2.imwrite(f'{output_feature_path}/{img_files[idx]}.jpg', input_img)
			cv2.imwrite(f'{output_label_path}/{img_files[idx]}.jpg', output_img)
			with open(f'{output_data_path}/{img_files[idx]}.json', 'w', encoding='utf-8') as fp:
				json.dump(data, fp, ensure_ascii=False, indent=4)
		
		logger.success(f'Finish handling {input_path} / {idx}.')
		cnt += 1

In [None]:
# exec_one_folder(input_path=input_folder_path, use_index_or_filename=False)

In [6]:
for dataset_dir in dataset_dirs:
	input_path = os.path.join(input_folder_path, dataset_dir)
	exec_one_folder(input_path=input_path)
	logger.success(f"Finish handling {input_path}")

[32m2025-06-29 14:21:16.233[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mexec_one_folder[0m:[36m7[0m - [32m[1mE:\record\sideview_mixed_dataset\POD_0.125_0.5_0.375_30C_100kPa_ (1) listed, 21 imgs found.[0m
[32m2025-06-29 14:21:16.381[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mexec_one_folder[0m:[36m47[0m - [32m[1mFinish handling E:\record\sideview_mixed_dataset\POD_0.125_0.5_0.375_30C_100kPa_ (1) / 0.[0m
[32m2025-06-29 14:21:16.485[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mexec_one_folder[0m:[36m47[0m - [32m[1mFinish handling E:\record\sideview_mixed_dataset\POD_0.125_0.5_0.375_30C_100kPa_ (1) / 1.[0m
[32m2025-06-29 14:21:16.581[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mexec_one_folder[0m:[36m47[0m - [32m[1mFinish handling E:\record\sideview_mixed_dataset\POD_0.125_0.5_0.375_30C_100kPa_ (1) / 2.[0m
[32m2025-06-29 14:21:16.688[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mexec_one_folder[0m:[36m47[0m - [32m[

In [7]:
# split train/valid/test
import os
import random
import shutil

# folder paths
source_folder = './output/feature'
train_folder = './output/feature/train'
val_folder = './output/feature/valid'
test_folder = './output/feature/test'

os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

images = [f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))]

random.shuffle(images)

# train/valid/test ratios
train_ratio = 0.9
val_ratio = 0.05

train_size = int(train_ratio * len(images))
val_size = int(val_ratio * len(images))
test_size = len(images) - train_size - val_size

train_set = images[:train_size]
val_set = images[train_size:train_size + val_size]
test_set = images[train_size + val_size:]

def move_files(file_list, destination_folder):
    for file_name in file_list:
        src_path = os.path.join(source_folder, file_name)
        dst_path = os.path.join(destination_folder, file_name)
        shutil.move(src_path, dst_path)

move_files(train_set, train_folder)
move_files(val_set, val_folder)
move_files(test_set, test_folder)

print(f"train set: {len(train_set)} imgs")
print(f"valid set: {len(val_set)} imgs")
print(f"test set: {len(test_set)} imgs")

train set: 1683 imgs
valid set: 93 imgs
test set: 94 imgs


In [8]:
# split train/valid/test
import os
import random
import shutil

source_folder = './output/label'
train_folder = './output/label/train'
val_folder = './output/label/valid'
test_folder = './output/label/test'

os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

images = [f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))]

def move_files(file_list, destination_folder):
    for file_name in file_list:
        src_path = os.path.join(source_folder, file_name)
        dst_path = os.path.join(destination_folder, file_name)
        shutil.move(src_path, dst_path)

move_files(train_set, train_folder)
move_files(val_set, val_folder)
move_files(test_set, test_folder)

print(f"train set: {len(train_set)} imgs")
print(f"valid set: {len(val_set)} imgs")
print(f"test set: {len(test_set)} imgs")

train set: 1683 imgs
valid set: 93 imgs
test set: 94 imgs
