In [None]:
import os
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import tensorflow as tf
import xml.etree.ElementTree as ET
import glob
import shutil
import matplotlib.image as mpimg

In [None]:
def find_first_n_files_in_folder(folder_path, n=5):
    files = []
    for root, dirs, file_names in os.walk(folder_path):
        for file_name in file_names:
            files.append(os.path.join(root, file_name))
            if len(files) >= n:
                return files
    return files

# Example usage
folder_path = '/kaggle/input/rdd2022-india/RDD_2022/train/images'
first_five_files = find_first_n_files_in_folder(folder_path)
for file in first_five_files:
    print(file)

In [None]:
def parse_xml_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    data = {
        'filename': root.find('filename').text,
        'width': int(root.find('size/width').text),
        'height': int(root.find('size/height').text),
    }

    objects = []
    for obj in root.findall('object'):
        if obj.find('name').text in ['D40','D20','D00','D10']:
#     for obj in root.findall('object'):
            obj_data = {
                'class': obj.find('name').text,
                'xmin': int(obj.find('bndbox/xmin').text),
                'ymin': int(obj.find('bndbox/ymin').text),
                'xmax': int(obj.find('bndbox/xmax').text),
                'ymax': int(obj.find('bndbox/ymax').text),
            }
            objects.append(obj_data)
    
    data['objects'] = objects
    return data

def parse_annotations(annotation_dir):
    annotations = []
    for xml_file in tqdm(os.listdir(annotation_dir)):
        if xml_file.endswith('.xml'):
            annotation = parse_xml_annotation(os.path.join(annotation_dir, xml_file))
            annotations.append(annotation)
    return annotations

# Example usage
annotation_dir = '/kaggle/input/rdd2022-india/RDD_2022/train/annotations/xmls'
annotations = parse_annotations(annotation_dir)


In [None]:
annotations[0:5]

In [None]:
len(annotations)

In [None]:
def annotations_to_dataframe(annotations):
    rows = []
    for annotation in annotations:
        filename = annotation['filename']
        width = annotation['width']
        height = annotation['height']
        if not annotation['objects']:
            row = {
                'filename': filename,
                'width': width,
                'height': height,
                'class': None,  # No object class
                'xmin': None,   # No bounding box
                'ymin': None,
                'xmax': None,
                'ymax': None
            }
            rows.append(row)
        else:
            for obj in annotation['objects']:
                row = {
                    'filename': filename,
                    'width': width,
                    'height': height,
                    'class': obj['class'],
                    'xmin': obj['xmin'],
                    'ymin': obj['ymin'],
                    'xmax': obj['xmax'],
                    'ymax': obj['ymax']
                }
                rows.append(row)
    return pd.DataFrame(rows)

# Example usage
df = annotations_to_dataframe(annotations)
df

In [None]:
df.info()

In [None]:
df.to_csv('annotations.csv')

In [None]:
df.isna().sum()

In [None]:
final_df =df.copy()
final_df.dropna(inplace=True)
final_df.reset_index(inplace=True)
final_df.drop('index',axis=1,inplace=True)
final_df

In [None]:
base_path = '/kaggle/input/rdd2022-india/RDD_2022/train/images'
final_df['image_path'] = final_df['filename'].apply(lambda x: os.path.join(base_path, x))
final_df

In [None]:
final_df['image_path'][0]

In [None]:
final_df['class'].unique()

In [None]:
final_df['class'].value_counts()

In [None]:
class_mapping = {
    'D00': 0,
    'D10': 1,
    'D20': 2,
    'D40': 3
}

output_dir = '/kaggle/working/all_labels'
os.makedirs(output_dir, exist_ok=True)

def convert_to_yolo_format(row):
    if pd.isna(row['class']):
        return

    class_id = class_mapping[row['class']]
    x_center = (row['xmin'] + row['xmax']) / 2.0 / row['width']
    y_center = (row['ymin'] + row['ymax']) / 2.0 / row['height']
    bbox_width = (row['xmax'] - row['xmin']) / row['width']
    bbox_height = (row['ymax'] - row['ymin']) / row['height']

    return f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}"

def write_yolo_annotation(final_df, output_dir):
    grouped = final_df.groupby('filename')
    for filename, group in grouped:
        yolo_annotations = group.apply(convert_to_yolo_format, axis=1).dropna().tolist()
        label_filename = os.path.join(output_dir, filename.replace('.jpg','.txt'))
        with open(label_filename, 'w') as f:
            f.write("\n".join(yolo_annotations))

write_yolo_annotation(final_df, output_dir)

In [None]:
os.makedirs('/kaggle/working/datasets/labels/train/', exist_ok=True)
os.makedirs('/kaggle/working/datasets/labels/val/', exist_ok=True)
os.makedirs('/kaggle/working/datasets/images/train/', exist_ok=True)
os.makedirs('/kaggle/working/datasets/images/val/', exist_ok=True)

In [None]:
label_source_dir = '/kaggle/working/all_labels'
label_train_dir = '/kaggle/working/datasets/labels/train/'
label_val_dir = '/kaggle/working/datasets/labels/val/'

image_source_dir ='/kaggle/input/rdd2022-india/RDD_2022/train/images'
image_train_dir = '/kaggle/working/datasets/images/train/'
image_val_dir = '/kaggle/working/datasets/images/val/'

image_files = final_df['image_path'].unique()
train_files = image_files[:int(0.8*len(image_files))]
val_files = image_files[int(0.8*len(image_files)):]

train_images = [os.path.splitext(os.path.basename(path.strip()))[0] + ".jpg" for path in train_files]
val_images = [os.path.splitext(os.path.basename(path.strip()))[0] + ".jpg" for path in val_files]
train_labels = [os.path.splitext(os.path.basename(path.strip()))[0] + ".txt" for path in train_files]
val_labels = [os.path.splitext(os.path.basename(path.strip()))[0] + ".txt" for path in val_files]


for filename in train_images:
    if filename.endswith('.jpg'):  # Ensure we're only moving label files
        shutil.copy(os.path.join(image_source_dir, filename), os.path.join(image_train_dir, filename))
for filename in val_images:
    if filename.endswith('.jpg'):  # Ensure we're only moving label files
        shutil.copy(os.path.join(image_source_dir, filename), os.path.join(image_val_dir, filename))

for filename in train_labels:
    if filename.endswith('.txt'):  # Ensure we're only moving label files
        shutil.move(os.path.join(label_source_dir, filename), os.path.join(label_train_dir, filename))
for filename in val_labels:
    if filename.endswith('.txt'):  # Ensure we're only moving label files
        shutil.move(os.path.join(label_source_dir, filename), os.path.join(label_val_dir, filename))
        

# with open('/kaggle/working/dataset/images/train', 'w') as f:
#     for item in train_files:
#         f.write("%s\n" % item)
        
        

# with open('/kaggle/working/val.txt', 'w') as f:
#     for item in val_files:
#         f.write("%s\n" % item)

In [None]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5

In [None]:
# Create the dataset.yaml file
dataset_yaml = """
train: /kaggle/working/datasets/images/train
val: /kaggle/working/datasets/images/val

# Number of classes
nc: 4

# Class names
names: [0, 1, 2, 3]
"""

# Save the dataset.yaml file
with open('/kaggle/working/dataset.yaml', 'w') as f:
    f.write(dataset_yaml)

In [None]:
!pip install -r requirements.txt

In [None]:
!wandb off

In [None]:
!python train.py --img 640 --batch 16 --epochs 50 --data /kaggle/working/dataset.yaml --cfg yolov5s.yaml --weights yolov5s.pt --name road_damage_detection

In [None]:
cache_path = '/kaggle/working/'
if os.path.exists(cache_path):
    os.remove(cache_path)
    print(f"The cache file '{cache_path}' has been deleted.")
else:
    print(f"The cache file '{cache_path}' does not exist.")

In [None]:
!zip -r weights.zip /kaggle/working/yolov5/runs/train/road_damage_detection/weights

In [None]:
!python detect.py --weights /kaggle/working/yolov5/runs/train/road_damage_detection/weights/best.pt --img 640 --conf 0.25 --source /kaggle/input/rdd2022-india/RDD_2022/test/images/India_000183.jpg --save-conf

In [None]:
import cv2
import plotly.express as px

# Load the image
image_path = '/kaggle/working/yolov5/runs/detect/exp3/India_000183.jpg'
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display the image with Plotly
fig = px.imshow(image_rgb)
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)
fig.show()


In [None]:
import shutil

# Define the source and destination paths
source_folder = '/kaggle/working/yolov5/runs/train/road_damage_detection'
output_zip = '/kaggle/working/road_damage_detection.zip'

# Create the zip file
shutil.make_archive(output_zip.replace('.zip', ''), 'zip', source_folder)

print("Folder successfully zipped!")
