In [1]:
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')

# GeoTiff Images
import rasterio
from osgeo import gdal

# Visualisation
from matplotlib import pyplot as plt
import matplotlib.image as img
from matplotlib.pyplot import figure
from PIL import Image

# Model Building
import ultralytics
from ultralytics import YOLO
import labelme2yolo

# Progress bar
from tqdm import tqdm

import numpy as np

# Others
import os
import shutil
import zipfile

%matplotlib inline

### Commercial vs Residential buildings
* Generate tile data from the high resolution maxar tif data for the pre storm damage
* Manually label tiles for both classes:
    * 1: undamagedcommercialbuilding
    * 2: undamagedresidentialbuilding
* Train YoloV8n model on the data
* Validate
* Optimize model through improvements
    * Additional datasets, other ? 

In [None]:
# !wget https://challenge.ey.com/api/v1/storage/admin-files/Pre_Event_San_Juan.tif -O Pre_Event_San_Juan.tif

In [3]:
def generate_tiles(input_file, output_dir,grid_x,grid_y):
    ds = gdal.Open(input_file)

    # Get image size and number of bands
    width = ds.RasterXSize
    height = ds.RasterYSize
    num_bands = ds.RasterCount

    # Calculate number of tiles in each dimension
    num_tiles_x = (width // grid_x)
    num_tiles_y = (height // grid_y)

    print(f"Total number of tiles: {num_tiles_x * num_tiles_y}")

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Iterate over each tile and save as a separate TIFF image
    for i in range(num_tiles_x):
        for j in range(num_tiles_y):
            x_offset = i *  grid_x
            y_offset = j *  grid_y

            tile_width = min(grid_x, width - x_offset)
            tile_height = min(grid_y, height - y_offset)

            tile = []
            for band in range(1, num_bands + 1):
                tile_data = ds.GetRasterBand(band).ReadAsArray(x_offset, y_offset, tile_width, tile_height)
                tile.append(tile_data)

            # Create output filename
            output_file = os.path.join(output_dir, f"tile_{i}_{j}.tif")
                
            # Create an output TIFF file with same CRS and band values range
            driver = gdal.GetDriverByName("GTiff")
            options = ['COMPRESS=DEFLATE', 'PREDICTOR=2', 'TILED=YES']
            out_ds = driver.Create(output_file, tile_width, tile_height, num_bands, 
                       ds.GetRasterBand(1).DataType, options=options)
            # out_ds = driver.Create(output_file, tile_width, tile_height, num_bands, ds.GetRasterBand(1).DataType)

            # Set the geotransform
            geotransform = list(ds.GetGeoTransform())
            geotransform[0] = geotransform[0] + x_offset * geotransform[1]
            geotransform[3] = geotransform[3] + y_offset * geotransform[5]
            out_ds.SetGeoTransform(tuple(geotransform))

            # Set the projection
            out_ds.SetProjection(ds.GetProjection())

            # Write each band to the output file
            for band in range(1, num_bands + 1):
                out_band = out_ds.GetRasterBand(band)
                out_band.WriteArray(tile[band - 1])

            # Close the output file
            out_ds = None

    print("Tiles generation completed.")

In [7]:
input_file = "./datasets raw/pre_event/Pre_Event_San_Juan.tif"
output_dir = "./datasets raw/pre_event/Pre_Event_Grids_In_TIFF"
grid_x = 640
grid_y = 640
generate_tiles(input_file, output_dir,grid_x,grid_y)

Total number of tiles: 6844
Tiles generation completed.


In [9]:
def convert_tiff_to_jpeg(input_dir,output_dir):
    # check if output_dir exists, if not create it
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in tqdm(os.listdir(input_dir), desc="Converting tiff tiles to jpeg tiles"):
        # check if file is an image (ends with .tif)
        if filename.endswith('.tif'):
            img = Image.open(os.path.join(input_dir, filename))
        
            # check if image is RGB mode, if not convert it
            if img.mode != 'RGB':
                img = img.convert('RGB')
        
            # create new filename, replace .tif with .jpg
            output_filename = os.path.splitext(filename)[0] + '.jpg'
        
            # save the image in JPEG format
            img.save(os.path.join(output_dir, output_filename), 'JPEG')
    print("Conversion from TIFF to JPEG completed.")

In [11]:
# specify directory
input_dir = "./datasets raw/pre_event/Pre_Event_Grids_In_TIFF"
output_dir = "./datasets raw/pre_event/Pre_Event_Grids_In_JPEG"
convert_tiff_to_jpeg(input_dir,output_dir)

Converting tiff tiles to jpeg tiles: 100%|██████████| 6844/6844 [02:00<00:00, 56.61it/s]

Conversion from TIFF to JPEG completed.





In [13]:
def rename_files(directory_path:str, prefix:str):
# Define the directory path where your files are located
    directory_path = directory_path
    
    # Get a list of all files in the directory
    files = os.listdir(directory_path)
    
    # Define a prefix for the new file names 
    # Change the prefix as per requirement
    prefix = prefix
    
    # Start the numbering from 1
    number = 0
    
    # Loop through each file in the directory
    for filename in tqdm(files, desc='Renaming tiles'):
        # Check if the item is a file (not a directory)
        if os.path.isfile(os.path.join(directory_path, filename)):
            # Get the file extension
            file_extension = os.path.splitext(filename)[1]
    
            # Create the new file name with leading zeros
            new_filename = f"{prefix}{number:03}{file_extension}"
    
            # Construct the full path to the original and new files
            old_filepath = os.path.join(directory_path, filename)
            new_filepath = os.path.join(directory_path, new_filename)
    
            # Rename the file
            os.rename(old_filepath, new_filepath)
    
            # Increment the number for the next file
            number += 1
    
    print("Files renamed successfully.")

In [14]:
rename_files(directory_path=output_dir, prefix='Pre_Event_')

Renaming tiles: 100%|██████████| 6844/6844 [00:00<00:00, 9391.78it/s] 

Files renamed successfully.





#### Preparing Training/Validation Datasets
Using labelme2yolo package for this operation

In [17]:
# !labelme2yolo --json_dir ../datasets_raw/pre_event/Pre_Event_Grids_In_JPEG/

In [2]:
#Loading the model
model = YOLO('yolov8n.pt')
# Display model information (optional)
model.info()

YOLOv8n summary: 225 layers, 3,157,200 parameters, 0 gradients, 8.9 GFLOPs


(225, 3157200, 0, 8.8575488)

In [3]:
# Train the model on the dataset for 50 epochs
data_path = '../datasets/Pre/dataset.yaml'
epochs = 25
time = 1
imgsz = 640
device="mps"
patience = 10

results = model.train(data=data_path, epochs=epochs, time=time, patience=patience, imgsz=imgsz, device=device)

New https://pypi.org/project/ultralytics/8.2.87 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.77 🚀 Python-3.12.3 torch-2.4.0 MPS (Apple M3)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=../datasets/Pre/dataset.yaml, epochs=25, time=1, patience=10, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=Fals



Overriding model.yaml nc=80 with nc=2

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

[34m[1mtrain: [0mScanning /Users/atsoc/Python_files/Machine_Learning/EY Challenge/Storm Damage - 2024/datasets/Pre/train.cache... 26 images, 0 backgrounds, 0 corrupt: 100%|██████████| 26/26 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/atsoc/Python_files/Machine_Learning/EY Challenge/Storm Damage - 2024/datasets/Pre/val.cache... 6 images, 0 backgrounds, 0 corrupt: 100%|██████████| 6/6 [00:00<?, ?it/s]


Plotting labels to runs/detect/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train3[0m
Starting training for 1 hours...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25         0G      2.282      3.796      2.091        446        640: 100%|██████████| 2/2 [00:24<00:00, 12.45s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:12<00:00, 12.54s/it]

                   all          6         81    0.00655      0.408     0.0525     0.0267






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/94         0G      2.263      3.781      2.006        415        640: 100%|██████████| 2/2 [00:03<00:00,  1.77s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.06it/s]

                   all          6         81    0.00674      0.382     0.0706     0.0352






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/168         0G      2.238      3.722      1.966        345        640: 100%|██████████| 2/2 [00:03<00:00,  1.54s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.36it/s]

                   all          6         81     0.0104      0.453      0.128      0.075






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/230         0G      2.117      3.637      1.868        384        640: 100%|██████████| 2/2 [00:03<00:00,  1.61s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.28it/s]

                   all          6         81     0.0147      0.532      0.173      0.122






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/281         0G      2.067      3.419      1.864        315        640: 100%|██████████| 2/2 [00:03<00:00,  1.99s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  2.82it/s]

                   all          6         81     0.0234      0.645      0.218      0.152






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/318         0G      1.998      3.296      1.718        347        640: 100%|██████████| 2/2 [00:04<00:00,  2.02s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  2.85it/s]

                   all          6         81     0.0281      0.739      0.231      0.152






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/348         0G      2.018      3.082      1.697        469        640: 100%|██████████| 2/2 [00:04<00:00,  2.28s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  2.73it/s]

                   all          6         81     0.0278      0.739      0.244       0.16






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/370         0G      2.031       2.74      1.706        230        640: 100%|██████████| 2/2 [00:04<00:00,  2.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  1.80it/s]

                   all          6         81     0.0286      0.755      0.255      0.166






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/387         0G      1.961      2.378      1.687        348        640: 100%|██████████| 2/2 [00:03<00:00,  1.89s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  1.23it/s]

                   all          6         81     0.0283      0.747      0.244      0.155






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/404         0G      1.954      2.295      1.586        333        640: 100%|██████████| 2/2 [00:04<00:00,  2.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

                   all          6         81     0.0292      0.762      0.272       0.16






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/412         0G      1.981       2.17      1.582        363        640: 100%|██████████| 2/2 [00:04<00:00,  2.32s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.49s/it]

                   all          6         81     0.0329      0.808      0.435      0.199






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/418         0G      2.036      1.921      1.662        212        640: 100%|██████████| 2/2 [00:06<00:00,  3.01s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.65s/it]

                   all          6         81      0.034      0.815       0.49      0.219






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/417         0G      2.056      1.872      1.649        276        640: 100%|██████████| 2/2 [00:05<00:00,  2.71s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.64s/it]

                   all          6         81       0.93      0.139      0.545      0.226






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/409         0G      1.965      1.663      1.541        377        640: 100%|██████████| 2/2 [00:07<00:00,  3.59s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:02<00:00,  2.91s/it]

                   all          6         81      0.958      0.245      0.515      0.189






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/399         0G      2.016      1.713      1.597        294        640: 100%|██████████| 2/2 [00:07<00:00,  3.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:02<00:00,  2.77s/it]

                   all          6         81      0.911      0.281      0.507       0.19






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/390         0G      1.944       1.56      1.553        292        640: 100%|██████████| 2/2 [00:07<00:00,  3.58s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:02<00:00,  2.97s/it]

                   all          6         81      0.893      0.335      0.494      0.194






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/383         0G      2.029      1.756      1.635        418        640: 100%|██████████| 2/2 [00:07<00:00,  3.70s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.07s/it]

                   all          6         81      0.852      0.333      0.494      0.184






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/375         0G      1.907      1.519      1.552        353        640: 100%|██████████| 2/2 [00:07<00:00,  3.79s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.04s/it]

                   all          6         81      0.576      0.365      0.489      0.158






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     19/369         0G      1.911      1.594      1.564        364        640: 100%|██████████| 2/2 [00:08<00:00,  4.00s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.44s/it]

                   all          6         81      0.579        0.4      0.414      0.144






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     20/359         0G      1.905      1.511      1.486        451        640: 100%|██████████| 2/2 [00:09<00:00,  4.74s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.08s/it]

                   all          6         81      0.543      0.415      0.403      0.151






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     21/350         0G      1.811      1.392      1.542        345        640: 100%|██████████| 2/2 [00:08<00:00,  4.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.27s/it]

                   all          6         81      0.491      0.469       0.39      0.129






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     22/344         0G      1.917      1.429      1.506        479        640: 100%|██████████| 2/2 [00:09<00:00,  4.80s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:02<00:00,  2.89s/it]

                   all          6         81       0.58        0.5      0.417      0.146






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     23/337         0G      1.877      1.467      1.552        385        640: 100%|██████████| 2/2 [00:07<00:00,  3.67s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.04s/it]

                   all          6         81      0.552      0.527      0.452      0.173





[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 10 epochs. Best results observed at epoch 13, best model saved as best.pt.
To update EarlyStopping(patience=10) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.

23 epochs completed in 0.069 hours.
Optimizer stripped from runs/detect/train3/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train3/weights/best.pt, 6.2MB

Validating runs/detect/train3/weights/best.pt...
Ultralytics YOLOv8.2.77 🚀 Python-3.12.3 torch-2.4.0 MPS (Apple M3)
Model summary (fused): 168 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:03<00:00,  3.74s/it]


                   all          6         81      0.931      0.142      0.522      0.224
undamagedresidentialbuilding          4         66      0.862      0.285      0.633      0.185
undamagedcommercialbuilding          5         15          1          0       0.41      0.262
Speed: 45.6ms preprocess, 258.7ms inference, 0.0ms loss, 242.8ms postprocess per image
Results saved to [1mruns/detect/train3[0m


In [None]:
# # Load the Model
# model = YOLO('./runs/detect/train26/weights/best.pt')