# Dataset

In [None]:
import random, os
import re
from collections import defaultdict

def analyze_dataset_quality(dataset_dir):
    pattern = re.compile(r'([-\d]+)_c(\d)')
    id_counts = defaultdict(int)
    cam_counts = defaultdict(int)
    
    images = [f for f in os.listdir(dataset_dir) if f.endswith(('.jpg', '.png'))]
    
    for img in images:
        m = pattern.search(img)
        if m:
            person_id = int(m.group(1))
            camera_id = int(m.group(2))
            id_counts[person_id] += 1
            cam_counts[camera_id] += 1
    
    print(f"Total images: {len(images)}")
    print(f"Unique IDs: {len(id_counts)}")
    print(f"Unique cameras: {len(cam_counts)}")
    print(f"Avg images per ID: {sum(id_counts.values()) / len(id_counts):.1f}")
    print(f"Min images per ID: {min(id_counts.values())}")
    print(f"Max images per ID: {max(id_counts.values())}")
    
    # Az resmi olan ID'leri göster
    low_count_ids = [id for id, count in id_counts.items() if count < 5]
    print(f"IDs with < 5 images: {len(low_count_ids)}")

# Veri setlerini analiz et
print("=== TRAIN SET ===")
analyze_dataset_quality("/home/ika/Downloads/Market-1501-v15.09.15/bounding_box_train")
print("\n=== QUERY SET ===")
analyze_dataset_quality("/home/ika/Downloads/Market-1501-v15.09.15/query")
print("\n=== GALLERY SET ===")
analyze_dataset_quality("/home/ika/Downloads/Market-1501-v15.09.15/bounding_box_test")

In [None]:
import os
import shutil
import random
import re
from collections import defaultdict

def create_proper_reid_split(
    train_src, query_src, gallery_src,
    output_dir, train_ratio=0.7
):
    """
    TAO için doğru train/test ayrımını oluşturur.
    Test seti, train setinde OLMAYAN ID'leri içerir.
    """
    print("Creating proper Re-ID split...")
    
    # Çıktı klasörlerini hazırla
    train_dst = os.path.join(output_dir, 'bounding_box_train')
    query_dst = os.path.join(output_dir, 'query')
    gallery_dst = os.path.join(output_dir, 'bounding_box_test')
    
    for d in [train_dst, query_dst, gallery_dst]:
        if os.path.exists(d):
            shutil.rmtree(d)
        os.makedirs(d)

    # Tüm resimleri ve ID'leri topla
    all_images = defaultdict(list)
    pattern = re.compile(r'([-\d]+)_c(\d)')
    
    for src in [train_src, query_src, gallery_src]:
        for img in os.listdir(src):
            if img.endswith(('.jpg', '.png')):
                m = pattern.search(img)
                if m:
                    person_id = int(m.group(1))
                    all_images[person_id].append(os.path.join(src, img))

    print(f"Total unique IDs found: {len(all_images)}")
    
    # ID'leri train ve test olarak ayır
    all_ids = list(all_images.keys())
    random.shuffle(all_ids)
    
    train_size = int(len(all_ids) * train_ratio)
    train_ids = set(all_ids[:train_size])
    test_ids = set(all_ids[train_size:])
    
    print(f"Splitting into {len(train_ids)} train IDs and {len(test_ids)} test IDs.")

    # Train setini oluştur
    for person_id in train_ids:
        for img_path in all_images[person_id]:
            shutil.copy(img_path, os.path.join(train_dst, os.path.basename(img_path)))
            
    # Query ve gallery setlerini oluştur
    for person_id in test_ids:
        images = all_images[person_id]
        random.shuffle(images)
        
        # Her test ID'si için 1 query resmi, geri kalanı gallery
        if len(images) > 1:
            # Query
            shutil.copy(images[0], os.path.join(query_dst, os.path.basename(images[0])))
            # Gallery
            for i in range(1, len(images)):
                shutil.copy(images[i], os.path.join(gallery_dst, os.path.basename(images[i])))
        elif len(images) == 1:
            # Sadece 1 resmi varsa hem query hem gallery'e koy
            shutil.copy(images[0], os.path.join(query_dst, os.path.basename(images[0])))
            shutil.copy(images[0], os.path.join(gallery_dst, os.path.basename(images[0])))

    print("\nProper split created successfully!")
    print(f"  Train: {len(os.listdir(train_dst))} images")
    print(f"  Query: {len(os.listdir(query_dst))} images")
    print(f"  Gallery: {len(os.listdir(gallery_dst))} images")

# Mevcut klasör yollarınızı buraya girin
train_src = "/home/ika/yzlm/TwinProject/CCVID/CCVID_market1501_format/bounding_box_train"
query_src = "/home/ika/yzlm/TwinProject/CCVID/CCVID_market1501_format/query"
gallery_src = "/home/ika/yzlm/TwinProject/CCVID/CCVID_market1501_format/bounding_box_test"

# Yeni ve doğru veri setinin oluşturulacağı klasör
output_dir = "/home/ika/yzlm/TwinProject/CCVID/CCVID_proper_split"

# Kodu çalıştır
create_proper_reid_split(train_src, query_src, gallery_src, output_dir, train_ratio=0.7)

# Train 

In [None]:
!tao model re_identification train -e /home/ika/yzlm/TwinProject/CCVID/train_with_val.yaml

# Evaluate


In [None]:
!tao model re_identification export \
    -e /home/ika/yzlm/TwinProject/CCVID/export.yaml \
    dataset.num_classes=158

In [None]:
# Evaluation komutu
!tao model re_identification evaluate \
    -e /home/ika/yzlm/TwinProject/CCVID/train_with_val.yaml \
    evaluate.checkpoint=/home/ika/yzlm/TwinProject/CCVID/results/train/model_epoch_049_step_03838.pth \
    evaluate.query_dataset=/home/ika/yzlm/TwinProject/CCVID/CCVID_proper_split/query \
    evaluate.test_dataset=/home/ika/yzlm/TwinProject/CCVID/CCVID_proper_split/bounding_box_test 


In [None]:
# Evaluation komutu
!tao model re_identification evaluate \
    -e /home/ika/yzlm/TwinProject/CCVID/train_with_val.yaml \
    evaluate.checkpoint=/home/ika/yzlm/TwinProject/CCVID/results_imagenet/train/model_epoch_049_step_03838.pth \
    evaluate.query_dataset=/home/ika/yzlm/TwinProject/CCVID/CCVID_proper_split/query \
    evaluate.test_dataset=/home/ika/yzlm/TwinProject/CCVID/CCVID_proper_split/bounding_box_test 
