In [2]:
!pip install ultralytics -q

In [3]:
import os
from pathlib import Path
from ultralytics import YOLO
import shutil
import kagglehub

In [4]:
path = kagglehub.dataset_download("anulayakhare/crackathon-data")
print("Path to dataset files:", path)

Using Colab cache for faster access to the 'crackathon-data' dataset.
Path to dataset files: /kaggle/input/crackathon-data


In [99]:
dataset_path = os.path.join(path, "randomized_dataset")
test_images_path = Path(dataset_path) / "test" / "images"  # Adjust based on actual structure
weights_path = "best.pt"
print(f"Test images location: {test_images_path}")
print(f"Weights: {weights_path}")

Test images location: /kaggle/input/crackathon-data/randomized_dataset/test/images
Weights: best.pt


In [100]:
model = YOLO(weights_path)
print("Model loaded successfully!")

Model loaded successfully!


In [104]:
# Create empty txt files for images with no detections
test_images = list(test_images_path.glob("*.jpg"))
labels_path = Path("InceptionJS/predictions/labels/")

for img_path in test_images:
    txt_file = labels_path / f"{img_path.stem}.txt"
    if not txt_file.exists():
        # Create empty file
        txt_file.touch()

print(f"✓ Ensured all {len(test_images)} images have .txt files")

✓ Ensured all 6000 images have .txt files


In [105]:
results = model.predict(
    source=test_images_path,
    save_txt=True,
    save_conf=True,
    conf=0.1,
    iou=0.55,
    project="InceptionJS",
    name="predictions",
    stream=True,
    exist_ok=True
)
for result in results:
  pass

print(f"✓ Inference complete!")
print(f"✓ Predictions saved to: InceptionJS/predictions/labels/")

# Cell 5: Move predictions to submission folder
labels_path = Path("InceptionJS/predictions/labels/")
output_dir = Path("predictions")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
image 1006/6000 /kaggle/input/crackathon-data/randomized_dataset/test/images/006481.jpg: 768x768 1 Longitudinal_Crack, 1 Alligator_Crack, 37.3ms
image 1007/6000 /kaggle/input/crackathon-data/randomized_dataset/test/images/006483.jpg: 768x768 2 Other_Corruptions, 38.3ms
image 1008/6000 /kaggle/input/crackathon-data/randomized_dataset/test/images/006486.jpg: 768x768 1 Longitudinal_Crack, 1 Transverse_Crack, 37.6ms
image 1009/6000 /kaggle/input/crackathon-data/randomized_dataset/test/images/006487.jpg: 768x768 1 Longitudinal_Crack, 37.1ms
image 1010/6000 /kaggle/input/crackathon-data/randomized_dataset/test/images/006499.jpg: 768x768 2 Transverse_Cracks, 1 Alligator_Crack, 1 Other_Corruption, 37.8ms
image 1011/6000 /kaggle/input/crackathon-data/randomized_dataset/test/images/006500.jpg: 768x768 1 Longitudinal_Crack, 2 Transverse_Cracks, 37.4ms
image 1012/6000 /kaggle/input/crackathon-data/randomized_dataset/test/images/00650

In [106]:
# Cell 6: Sanity Check
print("\n" + "="*50)
print("SANITY CHECK")
print("="*50)

test_images = list(test_images_path.glob("*.jpg")) + list(test_images_path.glob("*.png"))
pred_files = list(labels_path.glob("*.txt"))

# Check number of files
print(f"\n1. Number of prediction files: {len(pred_files)}")
print(f"   Number of test images: {len(test_images)}")
print(f"   Match: {'YES' if len(pred_files) == len(test_images) else 'NO'}")

# Check file naming convention
print(f"\n2. File naming convention check:")
mismatched = []
for img_path in test_images:
    pred_file = labels_path / f"{img_path.stem}.txt"
    if not pred_file.exists():
        mismatched.append(img_path.name)

if mismatched:
    print(f"   ✗ Missing predictions for {len(mismatched)} images: {mismatched[:5]}")
else:
    print(f"   ✓ All test images have corresponding .txt files")

# Check format of predictions
print(f"\n3. Format validation (sample 5 files):")
non_empty_files = [f for f in pred_files if f.stat().st_size > 0]
for pred_file in non_empty_files[:5]:
    print(f"\n   File: {pred_file.name}")
    with open(pred_file, 'r') as f:
        lines = f.readlines()
        for i, line in enumerate(lines[:3]):  # Show first 3 lines
            parts = line.strip().split()
            if len(parts) == 6:
                cls, x, y, w, h, conf = parts
                print(f"      Line {i+1}: class={cls}, x={x}, y={y}, w={w}, h={h}, conf={conf}")
                # Validate class is 0-4
                try:
                    if int(cls) not in range(5):
                        print(f"WARNING: Invalid class ID")
                except:
                    print(f"WARNING: Invalid class value")
            else:
                print(f"WARNING: Line {i+1} has {len(parts)} values (expected 6)")

# Check class distribution
print(f"\n4. Class distribution in predictions:")
class_counts = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}
class_names = {0: "Longitudinal", 1: "Transverse", 2: "Alligator", 3: "Other", 4: "Pothole"}
total_detections = 0
empty_files = 0

for pred_file in pred_files:
    with open(pred_file, 'r') as f:
        lines = f.readlines()
        if len(lines) == 0:
            empty_files += 1
        for line in lines:
            try:
                parts = line.strip().split()
                if len(parts) >= 1:
                    cls = int(parts[0])
                    if cls in class_counts:
                        class_counts[cls] += 1
                        total_detections += 1
            except:
                pass

for cls_id, count in class_counts.items():
    print(f"   Class {cls_id} ({class_names[cls_id]}): {count} detections")
print(f"   Total detections: {total_detections}")
print(f"   Empty files (no detections): {empty_files}")

print(f"\n5. Ready for submission:")
print(f"   ✓ Zip the 'predictions' folder and submit")


SANITY CHECK

1. Number of prediction files: 6000
   Number of test images: 6000
   Match: ✓ YES

2. File naming convention check:
   ✓ All test images have corresponding .txt files

3. Format validation (sample 5 files):

   File: 006920.txt
      Line 1: class=3, x=0.657359, y=0.915005, w=0.103371, h=0.0553405, conf=0.768811
      Line 2: class=3, x=0.388046, y=0.556322, w=0.0970065, h=0.0249637, conf=0.721135
      Line 3: class=3, x=0.966031, y=0.688207, w=0.0677486, h=0.0409776, conf=0.631939

   File: 038031.txt
      Line 1: class=3, x=0.901315, y=0.726365, w=0.181104, h=0.0550578, conf=0.704937
      Line 2: class=3, x=0.806588, y=0.777384, w=0.386749, h=0.167169, conf=0.656239
      Line 3: class=0, x=0.49181, y=0.875604, w=0.0858617, h=0.191266, conf=0.522971

   File: 024504.txt
      Line 1: class=2, x=0.391975, y=0.864101, w=0.757348, h=0.269284, conf=0.59839
      Line 2: class=3, x=0.553627, y=0.923839, w=0.435385, h=0.0971731, conf=0.471807
      Line 3: class=3, x=0.4

In [107]:
from google.colab import files
import shutil

# Create zip first
shutil.make_archive('test_predictions_labels', 'zip', '/content/InceptionJS/predictions/labels')

# Download
files.download('test_predictions_labels.zip')
print("✓ Download started!")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✓ Download started!
