In [1]:
# Script to merge original labeled and pseudo-labeled data for Student Model training
import os
import pandas as pd
import shutil

# Paths
ORIG_IMG_DIR = "G:/Sajal_Data/Obj_4_Code/Teacher_model_training/data/images"
ORIG_MASK_DIR = "G:/Sajal_Data/Obj_4_Code/Teacher_model_training/data/masks"
ORIG_CSV = "G:/Sajal_Data/Obj_4_Code/Teacher_model_training/data/kamra_teacher_expanded.csv"

PSEUDO_IMG_DIR = "G:/Sajal_Data/Obj_4_Code/TAPNet_Dataset/video_frames"
PSEUDO_MASK_DIR = "G:/Sajal_Data/Obj_4_Code/TAPNet_Dataset/contour_masks"
PSEUDO_CSV = "G:/Sajal_Data/Obj_4_Code/TAPNet_Dataset/contour_labels.csv"

MERGED_IMG_DIR = "G:/Sajal_Data/Obj_4_Code/TAPNet_Dataset/student_training/images"
MERGED_MASK_DIR = "G:/Sajal_Data/Obj_4_Code/TAPNet_Dataset/student_training/masks"
MERGED_CSV = "G:/Sajal_Data/Obj_4_Code/TAPNet_Dataset/student_training/merged_dataset.csv"

# Create output directories
os.makedirs(MERGED_IMG_DIR, exist_ok=True)
os.makedirs(MERGED_MASK_DIR, exist_ok=True)

# Load CSVs
orig_df = pd.read_csv(ORIG_CSV)
pseudo_df = pd.read_csv(PSEUDO_CSV)

# Convert pseudo CSV to match original CSV format: [image, plane, value]
pseudo_grouped = pseudo_df.groupby("image").first().reset_index()
merged_df = pd.concat([orig_df, pseudo_grouped], ignore_index=True)

# Copy all images and masks to merged folder
def safe_copy(src_dir, dst_dir, name):
    src_path = os.path.join(src_dir, name)
    dst_path = os.path.join(dst_dir, name)
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)

for row in merged_df.itertuples():
    img_name = row.image
    mask_name = img_name.replace(".jpg", "_mask.png").replace(".png", "_mask.png")

    if os.path.exists(os.path.join(ORIG_IMG_DIR, img_name)):
        safe_copy(ORIG_IMG_DIR, MERGED_IMG_DIR, img_name)
        safe_copy(ORIG_MASK_DIR, MERGED_MASK_DIR, mask_name)
    elif os.path.exists(os.path.join(PSEUDO_IMG_DIR, img_name)):
        safe_copy(PSEUDO_IMG_DIR, MERGED_IMG_DIR, img_name)
        safe_copy(PSEUDO_MASK_DIR, MERGED_MASK_DIR, mask_name)

# Save final merged dataset CSV
merged_df.to_csv(MERGED_CSV, index=False)

print("✅ Dataset merging completed for student model training.")

✅ Dataset merging completed for student model training.
