In [1]:
import os
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import tensorflow as tf
import xml.etree.ElementTree as ET
import glob
import shutil
import matplotlib.image as mpimg

2024-07-31 13:04:05.674777: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-31 13:04:05.674901: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-31 13:04:05.784876: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
def find_first_n_files_in_folder(folder_path, n=5):
    files = []
    for root, dirs, file_names in os.walk(folder_path):
        for file_name in file_names:
            files.append(os.path.join(root, file_name))
            if len(files) >= n:
                return files
    return files

# Example usage
folder_path = '/kaggle/input/rdd2022-india/RDD_2022/train/images'
first_five_files = find_first_n_files_in_folder(folder_path)
for file in first_five_files:
    print(file)

/kaggle/input/rdd2022-india/RDD_2022/train/images/India_000594.jpg
/kaggle/input/rdd2022-india/RDD_2022/train/images/India_006535.jpg
/kaggle/input/rdd2022-india/RDD_2022/train/images/Japan_005868.jpg
/kaggle/input/rdd2022-india/RDD_2022/train/images/India_006385.jpg
/kaggle/input/rdd2022-india/RDD_2022/train/images/Japan_002938.jpg


In [3]:
def parse_xml_annotation(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    data = {
        'filename': root.find('filename').text,
        'width': int(root.find('size/width').text),
        'height': int(root.find('size/height').text),
    }

    objects = []
    for obj in root.findall('object'):
        if obj.find('name').text in ['D40','D20','D00','D10']:
#     for obj in root.findall('object'):
            obj_data = {
                'class': obj.find('name').text,
                'xmin': int(obj.find('bndbox/xmin').text),
                'ymin': int(obj.find('bndbox/ymin').text),
                'xmax': int(obj.find('bndbox/xmax').text),
                'ymax': int(obj.find('bndbox/ymax').text),
            }
            objects.append(obj_data)
    
    data['objects'] = objects
    return data

def parse_annotations(annotation_dir):
    annotations = []
    for xml_file in tqdm(os.listdir(annotation_dir)):
        if xml_file.endswith('.xml'):
            annotation = parse_xml_annotation(os.path.join(annotation_dir, xml_file))
            annotations.append(annotation)
    return annotations

# Example usage
annotation_dir = '/kaggle/input/rdd2022-india/RDD_2022/train/annotations/xmls'
annotations = parse_annotations(annotation_dir)


  0%|          | 0/23017 [00:00<?, ?it/s]

In [4]:
annotations[0:5]

[{'filename': 'India_007360.jpg', 'width': 720, 'height': 720, 'objects': []},
 {'filename': 'United_States_002203.jpg',
  'width': 640,
  'height': 640,
  'objects': [{'class': 'D00',
    'xmin': 252,
    'ymin': 430,
    'xmax': 388,
    'ymax': 626}]},
 {'filename': 'Japan_007915.jpg',
  'width': 600,
  'height': 600,
  'objects': [{'class': 'D20',
    'xmin': 363,
    'ymin': 249,
    'xmax': 587,
    'ymax': 541},
   {'class': 'D20', 'xmin': 32, 'ymin': 180, 'xmax': 235, 'ymax': 449},
   {'class': 'D10', 'xmin': 225, 'ymin': 370, 'xmax': 465, 'ymax': 415}]},
 {'filename': 'United_States_000342.jpg',
  'width': 640,
  'height': 640,
  'objects': [{'class': 'D00',
    'xmin': 167,
    'ymin': 442,
    'xmax': 209,
    'ymax': 566},
   {'class': 'D10', 'xmin': 71, 'ymin': 487, 'xmax': 192, 'ymax': 504}]},
 {'filename': 'India_006920.jpg',
  'width': 720,
  'height': 720,
  'objects': [{'class': 'D40',
    'xmin': 551,
    'ymin': 619,
    'xmax': 584,
    'ymax': 647},
   {'class': '

In [5]:
len(annotations)

23017

In [6]:
def annotations_to_dataframe(annotations):
    rows = []
    for annotation in annotations:
        filename = annotation['filename']
        width = annotation['width']
        height = annotation['height']
        if not annotation['objects']:
            row = {
                'filename': filename,
                'width': width,
                'height': height,
                'class': None,  # No object class
                'xmin': None,   # No bounding box
                'ymin': None,
                'xmax': None,
                'ymax': None
            }
            rows.append(row)
        else:
            for obj in annotation['objects']:
                row = {
                    'filename': filename,
                    'width': width,
                    'height': height,
                    'class': obj['class'],
                    'xmin': obj['xmin'],
                    'ymin': obj['ymin'],
                    'xmax': obj['xmax'],
                    'ymax': obj['ymax']
                }
                rows.append(row)
    return pd.DataFrame(rows)

# Example usage
df = annotations_to_dataframe(annotations)
df

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,India_007360.jpg,720,720,,,,,
1,United_States_002203.jpg,640,640,D00,252.0,430.0,388.0,626.0
2,Japan_007915.jpg,600,600,D20,363.0,249.0,587.0,541.0
3,Japan_007915.jpg,600,600,D20,32.0,180.0,235.0,449.0
4,Japan_007915.jpg,600,600,D10,225.0,370.0,465.0,415.0
...,...,...,...,...,...,...,...,...
41399,India_001539.jpg,720,720,,,,,
41400,Japan_012273.jpg,600,600,,,,,
41401,Japan_004793.jpg,600,600,,,,,
41402,Japan_005371.jpg,600,600,D20,67.0,426.0,230.0,600.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41404 entries, 0 to 41403
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   filename  41404 non-null  object 
 1   width     41404 non-null  int64  
 2   height    41404 non-null  int64  
 3   class     34315 non-null  object 
 4   xmin      34315 non-null  float64
 5   ymin      34315 non-null  float64
 6   xmax      34315 non-null  float64
 7   ymax      34315 non-null  float64
dtypes: float64(4), int64(2), object(2)
memory usage: 2.5+ MB


In [8]:
df.to_csv('annotations.csv')

In [9]:
df.isna().sum()

filename       0
width          0
height         0
class       7089
xmin        7089
ymin        7089
xmax        7089
ymax        7089
dtype: int64

In [10]:
final_df =df.copy()
final_df.dropna(inplace=True)
final_df.reset_index(inplace=True)
final_df.drop('index',axis=1,inplace=True)
final_df

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,United_States_002203.jpg,640,640,D00,252.0,430.0,388.0,626.0
1,Japan_007915.jpg,600,600,D20,363.0,249.0,587.0,541.0
2,Japan_007915.jpg,600,600,D20,32.0,180.0,235.0,449.0
3,Japan_007915.jpg,600,600,D10,225.0,370.0,465.0,415.0
4,United_States_000342.jpg,640,640,D00,167.0,442.0,209.0,566.0
...,...,...,...,...,...,...,...,...
34310,Japan_008562.jpg,600,600,D00,372.0,377.0,396.0,397.0
34311,United_States_001867.jpg,640,640,D10,95.0,452.0,542.0,511.0
34312,United_States_004656.jpg,640,640,D10,1.0,443.0,232.0,473.0
34313,Japan_005371.jpg,600,600,D20,67.0,426.0,230.0,600.0


In [11]:
base_path = '/kaggle/input/rdd2022-india/RDD_2022/train/images'
final_df['image_path'] = final_df['filename'].apply(lambda x: os.path.join(base_path, x))
final_df

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,image_path
0,United_States_002203.jpg,640,640,D00,252.0,430.0,388.0,626.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
1,Japan_007915.jpg,600,600,D20,363.0,249.0,587.0,541.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
2,Japan_007915.jpg,600,600,D20,32.0,180.0,235.0,449.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
3,Japan_007915.jpg,600,600,D10,225.0,370.0,465.0,415.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
4,United_States_000342.jpg,640,640,D00,167.0,442.0,209.0,566.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
...,...,...,...,...,...,...,...,...,...
34310,Japan_008562.jpg,600,600,D00,372.0,377.0,396.0,397.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
34311,United_States_001867.jpg,640,640,D10,95.0,452.0,542.0,511.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
34312,United_States_004656.jpg,640,640,D10,1.0,443.0,232.0,473.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...
34313,Japan_005371.jpg,600,600,D20,67.0,426.0,230.0,600.0,/kaggle/input/rdd2022-india/RDD_2022/train/ima...


In [12]:
final_df['image_path'][0]

'/kaggle/input/rdd2022-india/RDD_2022/train/images/United_States_002203.jpg'

In [13]:
final_df['class'].unique()

array(['D00', 'D20', 'D10', 'D40'], dtype=object)

In [14]:
final_df['class'].value_counts()

class
D00    12354
D20     9054
D10     7342
D40     5565
Name: count, dtype: int64

In [15]:
class_mapping = {
    'D00': 0,
    'D10': 1,
    'D20': 2,
    'D40': 3
}

output_dir = '/kaggle/working/all_labels'
os.makedirs(output_dir, exist_ok=True)

def convert_to_yolo_format(row):
    if pd.isna(row['class']):
        return

    class_id = class_mapping[row['class']]
    x_center = (row['xmin'] + row['xmax']) / 2.0 / row['width']
    y_center = (row['ymin'] + row['ymax']) / 2.0 / row['height']
    bbox_width = (row['xmax'] - row['xmin']) / row['width']
    bbox_height = (row['ymax'] - row['ymin']) / row['height']

    return f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}"

def write_yolo_annotation(final_df, output_dir):
    grouped = final_df.groupby('filename')
    for filename, group in grouped:
        yolo_annotations = group.apply(convert_to_yolo_format, axis=1).dropna().tolist()
        label_filename = os.path.join(output_dir, filename.replace('.jpg','.txt'))
        with open(label_filename, 'w') as f:
            f.write("\n".join(yolo_annotations))

write_yolo_annotation(final_df, output_dir)

In [16]:
os.makedirs('/kaggle/working/datasets/labels/train/', exist_ok=True)
os.makedirs('/kaggle/working/datasets/labels/val/', exist_ok=True)
os.makedirs('/kaggle/working/datasets/images/train/', exist_ok=True)
os.makedirs('/kaggle/working/datasets/images/val/', exist_ok=True)

In [17]:
label_source_dir = '/kaggle/working/all_labels'
label_train_dir = '/kaggle/working/datasets/labels/train/'
label_val_dir = '/kaggle/working/datasets/labels/val/'

image_source_dir ='/kaggle/input/rdd2022-india/RDD_2022/train/images'
image_train_dir = '/kaggle/working/datasets/images/train/'
image_val_dir = '/kaggle/working/datasets/images/val/'

image_files = final_df['image_path'].unique()
train_files = image_files[:int(0.8*len(image_files))]
val_files = image_files[int(0.8*len(image_files)):]

train_images = [os.path.splitext(os.path.basename(path.strip()))[0] + ".jpg" for path in train_files]
val_images = [os.path.splitext(os.path.basename(path.strip()))[0] + ".jpg" for path in val_files]
train_labels = [os.path.splitext(os.path.basename(path.strip()))[0] + ".txt" for path in train_files]
val_labels = [os.path.splitext(os.path.basename(path.strip()))[0] + ".txt" for path in val_files]


for filename in train_images:
    if filename.endswith('.jpg'):  # Ensure we're only moving label files
        shutil.copy(os.path.join(image_source_dir, filename), os.path.join(image_train_dir, filename))
for filename in val_images:
    if filename.endswith('.jpg'):  # Ensure we're only moving label files
        shutil.copy(os.path.join(image_source_dir, filename), os.path.join(image_val_dir, filename))

for filename in train_labels:
    if filename.endswith('.txt'):  # Ensure we're only moving label files
        shutil.move(os.path.join(label_source_dir, filename), os.path.join(label_train_dir, filename))
for filename in val_labels:
    if filename.endswith('.txt'):  # Ensure we're only moving label files
        shutil.move(os.path.join(label_source_dir, filename), os.path.join(label_val_dir, filename))
        

# with open('/kaggle/working/dataset/images/train', 'w') as f:
#     for item in train_files:
#         f.write("%s\n" % item)
        
        

# with open('/kaggle/working/val.txt', 'w') as f:
#     for item in val_files:
#         f.write("%s\n" % item)

In [18]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5

  pid, fd = os.forkpty()


Cloning into 'yolov5'...
remote: Enumerating objects: 16836, done.[K
remote: Counting objects: 100% (11/11), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 16836 (delta 1), reused 6 (delta 0), pack-reused 16825[K
Receiving objects: 100% (16836/16836), 15.57 MiB | 26.80 MiB/s, done.
Resolving deltas: 100% (11550/11550), done.
/kaggle/working/yolov5


In [19]:
# Create the dataset.yaml file
dataset_yaml = """
train: /kaggle/working/datasets/images/train
val: /kaggle/working/datasets/images/val

# Number of classes
nc: 4

# Class names
names: [0, 1, 2, 3]
"""

# Save the dataset.yaml file
with open('/kaggle/working/dataset.yaml', 'w') as f:
    f.write(dataset_yaml)

In [20]:
!pip install -r requirements.txt

Collecting pillow>=10.3.0 (from -r requirements.txt (line 9))
  Downloading pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.2.34 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.2.70-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m729.5 kB/s[0m eta [36m0:00:00[0m0:01[0m
Collecting setuptools>=70.0.0 (from -r requirements.txt (line 42))
  Downloading setuptools-72.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics>=8.2.34->-r requirements.txt (line 18))
  Downloading ultralytics_thop-2.0.0-py3-none-any.whl.metadata (8.5 kB)
Downloading pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m2

In [21]:
!wandb off

W&B offline. Running your script from this directory will only write metadata locally. Use wandb disabled to completely turn off W&B.


In [22]:
!python train.py --img 640 --batch 16 --epochs 50 --data /kaggle/working/dataset.yaml --cfg yolov5s.yaml --weights yolov5s.pt --name road_damage_detection

2024-07-31 13:09:44.067191: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-31 13:09:44.067258: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-31 13:09:44.068820: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=yolov5s.yaml, data=/kaggle/working/dataset.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=50, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, resume_evolve=None, bucket=, cache=None, image_weights=False, device=

In [41]:
cache_path = '/kaggle/working/'
if os.path.exists(cache_path):
    os.remove(cache_path)
    print(f"The cache file '{cache_path}' has been deleted.")
else:
    print(f"The cache file '{cache_path}' does not exist.")

IsADirectoryError: [Errno 21] Is a directory: '/kaggle/working/'

In [31]:
!zip -r weights.zip /kaggle/working/yolov5/runs/train/road_damage_detection/weights

  adding: kaggle/working/yolov5/runs/train/road_damage_detection/weights/ (stored 0%)
  adding: kaggle/working/yolov5/runs/train/road_damage_detection/weights/last.pt (deflated 9%)
  adding: kaggle/working/yolov5/runs/train/road_damage_detection/weights/best.pt (deflated 9%)


In [28]:
!python detect.py --weights /kaggle/working/yolov5/runs/train/road_damage_detection/weights/best.pt --img 640 --conf 0.25 --source /kaggle/input/rdd2022-india/RDD_2022/test/images/India_000183.jpg --save-conf

[34m[1mdetect: [0mweights=['/kaggle/working/yolov5/runs/train/road_damage_detection/weights/best.pt'], source=/kaggle/input/rdd2022-india/RDD_2022/test/images/India_000183.jpg, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_csv=False, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-350-g6096750f Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15095MiB)

Fusing layers... 
YOLOv5s summary: 157 layers, 7020913 parameters, 0 gradients, 15.8 GFLOPs
image 1/1 /kaggle/input/rdd2022-india/RDD_2022/test/images/India_000183.jpg: 640x640 1 0, 2 2s, 1 3, 11.5ms
Speed: 0.6ms pre-process, 11.5ms inference, 152.0ms NMS per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/e

In [29]:
import cv2
import plotly.express as px

# Load the image
image_path = '/kaggle/working/yolov5/runs/detect/exp3/India_000183.jpg'
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display the image with Plotly
fig = px.imshow(image_rgb)
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)
fig.show()


In [33]:
import shutil

# Define the source and destination paths
source_folder = '/kaggle/working/yolov5/runs/train/road_damage_detection'
output_zip = '/kaggle/working/road_damage_detection.zip'

# Create the zip file
shutil.make_archive(output_zip.replace('.zip', ''), 'zip', source_folder)

print("Folder successfully zipped!")


Folder successfully zipped!
