In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


##Copy image từ folder gốc qua drive riêng theo format folder của yolo/coco dataset

In [None]:
import os
import pandas as pd
import requests
import random
from sklearn.model_selection import train_test_split

# Paths and configurations
csv_path = "/content/UpdatedCarDataset.csv"  # Replace with your CSV file path
output_dir = "/content/drive/MyDrive/yolov11_car"
old_path = "/content/drive/MyDrive/Public"  # Replace with your base path
images_dir = os.path.join(output_dir, "images")
train_test_split_ratio = 0.8
images_per_label = 500

# Create directories
os.makedirs(output_dir, exist_ok=True)
os.makedirs(images_dir, exist_ok=True)

# Read CSV file without headers
df = pd.read_csv(csv_path, header=None, names=["path", "label"])
df['label'] = df['label'].astype(str)

# Function to copy an image
def copy_image(src, dest):
    try:
        if os.path.exists(src):
            os.makedirs(os.path.dirname(dest), exist_ok=True)
            with open(src, "rb") as fsrc, open(dest, "wb") as fdest:
                fdest.write(fsrc.read())
            return True
    except Exception as e:
        print(f"Failed to copy {src}: {e}")
    return False

# Process each label
for label in df['label'].unique():
    print(f"Processing label: {label}")
    label_df = df[df['label'] == label]
    label_output_dir = os.path.join(images_dir, label)
    os.makedirs(label_output_dir, exist_ok=True)

    # Copy images for the label
    copied_images = 0
    for _, row in label_df.iterrows():
        if copied_images >= images_per_label:
            break
        relative_path = row['path']
        full_path = os.path.join(old_path, relative_path)  # Construct full path
        output_path = os.path.join(label_output_dir, f"{copied_images}.jpg")
        if copy_image(full_path, output_path):
            copied_images += 1

    print(f"Copied {copied_images} images for label: {label}")

# Split into train and test datasets
for label in os.listdir(images_dir):
    label_path = os.path.join(images_dir, label)
    images = os.listdir(label_path)
    train, test = train_test_split(images, train_size=train_test_split_ratio, random_state=42)

    # Create train/test directories
    train_dir = os.path.join(output_dir, "train", label)
    test_dir = os.path.join(output_dir, "test", label)
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Move files to train/test directories
    for img in train:
        os.rename(os.path.join(label_path, img), os.path.join(train_dir, img))
    for img in test:
        os.rename(os.path.join(label_path, img), os.path.join(test_dir, img))

print("Data preparation completed.")


Processing label: 0
Copied 500 images for label: 0
Processing label: 1
Copied 500 images for label: 1
Processing label: 2
Copied 500 images for label: 2
Processing label: 3
Copied 500 images for label: 3
Processing label: 4
Copied 500 images for label: 4
Processing label: 5
Copied 500 images for label: 5
Processing label: 6
Copied 500 images for label: 6
Processing label: 7
Copied 500 images for label: 7
Processing label: 8
Copied 500 images for label: 8
Data preparation completed.


##Lọc các ảnh bị lỗi và lưu các ảnh bình thường vào UpdatedCarDataset

In [None]:
import os
import pandas as pd
from PIL import Image

def filter_png_or_invalid_images(image_dir, csv_path):
    # Load the CSV file
    df = pd.read_csv(csv_path, header=None, names=["image_path", "label"])

    # Create a list to store valid rows
    valid_rows = []

    for _, row in df.iterrows():
        img_path = os.path.join(image_dir, row["image_path"])
        try:
            with Image.open(img_path) as img:
                img.verify()  # Verify the image integrity
                if img.format != 'PNG':  # Keep non-PNG valid images
                    valid_rows.append(row)
        except Exception:
            # Skip rows corresponding to invalid images
            pass

    # Create a new DataFrame with valid rows
    updated_df = pd.DataFrame(valid_rows, columns=["image_path", "label"])
    return updated_df

# Example usage
image_dir = "/kaggle/input/car-brand/Public/Public"  # Update this path
csv_path = "/kaggle/input/car-brand/CarDataset.csv"  # Update this path

updated_df = filter_png_or_invalid_images(image_dir, csv_path)

# Display or save the DataFrame (optional)
print(updated_df.head())
# Optionally save to an output file in a writable directory
# updated_df.to_csv("/kaggle/working/UpdatedCarDataset.csv", header=False, index=False)


In [None]:
# Save the updated DataFrame to a CSV file in a writable location
output_csv_path = "/kaggle/working/UpdatedCarDataset.csv"
updated_df.to_csv(output_csv_path, header=False, index=False)

print(f"Updated CSV saved to: {output_csv_path}")