<a href="https://colab.research.google.com/github/pcarbomestre/SAR-VesselDetection-FisheriesMonitoring/blob/main/prediction_to_coordinates.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Load libraries

The virtual machines used in Colab for the runtimes are ephemeral, so some libraries not preinstalled in Colab environment have to be installed each time we open the notebook. More information about Colab [here](https://colab.research.google.com/github/tensorflow/workshops/blob/master/extras/amld/notebooks/solutions/0_colab.ipynb#scrollTo=fPP3Zw5iV2DP).

Detectron2 is a library that provides state-of-the-art detection and segmentation algorithms. It is not installed by default in the Colab environmebt so we have to install it each time we open a session.

More information about it at https://github.com/facebookresearch/detectron2.

In [None]:
# Install detectron2:
!pip install 'git+https://github.com/facebookresearch/detectron2.git'
exit(0) # After installation restart Colab's runtime.

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-23hu863q
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-23hu863q
  Resolved https://github.com/facebookresearch/detectron2.git to commit 94113be6e12db36b8c7601e13747587f19ec92fe
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6)
  

In [None]:
# Install other required libraries
!pip install rasterio
!pip install pyproj
!pip install pyshp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rasterio
  Downloading rasterio-1.3.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m82.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: snuggs, cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.3.7 snuggs-1.4.7
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecti

In [None]:
import yaml
import torch, torchvision

# Detectron2
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_test_loader
from detectron2.structures import BoxMode
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

# import some common libraries
import numpy as np
import os, json, cv2, random
from cv2 import hconcat
import matplotlib.pyplot as plt
# Spatial data libraries
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from osgeo import gdal, osr
import pandas as pd
import tifffile
import pyproj
import shapefile

We also need to connect to our Google Drive accoung where we have stored our datasets, and where we are going to save our outputs.

In [None]:
# Mount Google Drive (Import files from Google Drive in Colab)
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Set up and read in model

In [None]:
# Model path
model_path = '/content/drive/MyDrive/SSDD_pcarbomestre_2.0/output/ModelOutput/faster_rcnn_R_101_FPN_3x_26000iter/model_final.pth'


In [None]:
# detectron2 configuration
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))  # path to the YAML configuration file for a specific pre-trained model
cfg.DATASETS.TRAIN = ()
cfg.DATASETS.TEST = ()
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has class ship
cfg.OUTPUT_DIR = "output/"
cfg.MODEL.WEIGHTS = os.path.join(model_path) # Load custom weights
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # following Zhang et al. 2020
cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5  # following Zhang et al. 2020
cfg.MODEL.DEVICE='cpu'

predictor = DefaultPredictor(cfg)

[06/06 20:35:55 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /content/drive/MyDrive/SSDD_pcarbomestre_2.0/output/ModelOutput/faster_rcnn_R_101_FPN_3x_26000iter/model_final.pth ...


# Download images

Execute img_access.ipynb in Colab

## Get image areas

In [None]:
def calculate_area(filepath):
    # Open the GeoTIFF file
    with rasterio.open(filepath) as ds:
        # Get the georeferencing metadata
        transform = ds.transform

        # Calculate the area of a pixel
        pixel_area = abs(transform[0] * transform[4] - transform[1] * transform[3])

        # Read the raster data
        data = ds.read(1)

        # Get the number of non-NA pixels
        total_pixels = np.count_nonzero(~np.isnan(data)) # & (data != 0)) # to exclude non-zero pixels

        # Calculate the total area
        total_area = total_pixels * pixel_area

        # extract location from file name
        location = os.path.basename(filepath).split('_')[0]
        # extract date from file name
        date_str = os.path.basename(filepath).split('_')[1]
        # extract time from file name
        time_str = os.path.basename(filepath).split('_')[2].split('.')[0]

        # Create a dictionary to store the data
        data = {
            'location': location,
            'date': date_str,
            'time': time_str,
            'area': total_area,
            'pixel_count': total_pixels,
            'pixel_area': pixel_area
        }

    return data

def process_directory(path):
    # Create a list to store the data
    data_list = []

    # Traverse the directory structure
    for dirpath, dirnames, filenames in os.walk(path):
        # Process all .tif files
        for filename in filenames:
            if filename.endswith('.tif'):
                filepath = os.path.join(dirpath, filename)
                data = calculate_area(filepath)
                data_list.append(data)

    # Convert the list of dictionaries to a Pandas DataFrame
    df = pd.DataFrame(data_list)

    # Save DataFrame to CSV
    df.to_csv("/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/study_case/images_details/image_areas_20.csv", index=False)

    return df

In [None]:
image_areas_df = process_directory("/content/drive/MyDrive/sentinel_images")

## Get total images size

In [None]:
def count_files(folder_path):
    count = 0
    for _, _, files in os.walk(folder_path):
        count += len(files)
    return count

folder_path = '/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/data/datasets/test'  # Replace with the actual folder path
print(count_files(folder_path))

740


In [None]:
def get_dir_size(path='.'):
    total = 0
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_file():
                total += entry.stat().st_size
            elif entry.is_dir():
                total += get_dir_size(entry.path)
    return total

print(get_dir_size('/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/data/datasets/test')/ 1073741824)


0.2158882413059473


# Filter images

Filter images, to exclude those ones with incorrect shape. Subsequently, reproject all images to the same CRS of interes

## Remove tif with invalid shape

In [None]:
def get_tile_offsets(tif_path):
    with tifffile.TiffFile(tif_path) as tif:
        tags = tif.pages[0].tags
        tile_offsets = tags["TileOffsets"].value
        return tile_offsets

def remove_file_if_needed(tif_path):
    tile_offsets = get_tile_offsets(tif_path)
    if len(tile_offsets) <= 1:
        os.remove(tif_path)
        print("File removed:", tif_path)
    else:
        print("File not removed:", tif_path)

def process_folder(folder_path):
    for filename in os.listdir(folder_path):
        if filename.endswith(".tif") or filename.endswith(".tiff"):
            file_path = os.path.join(folder_path, filename)
            remove_file_if_needed(file_path)



In [None]:
folder_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2020-1"
process_folder(folder_path)

File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-01-04_235745.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-01-11_234959.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-01-16_235745.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-01-23_234958.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-01-28_235744.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-02-04_234958.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-02-09_235744.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-02-16_234958.tif
File not removed: /content/drive/Shareddrives/ssdd_pcarbomestre/2020-1/corcovadogulf_2020-02-21_235744.tif
File not removed: /content/drive/Shar

## Reproject

In [None]:
def check_rasters_projection(images_folder, dst_crs='EPSG:4326'):
    """
    Check if all raster files in a directory and its subdirectories are in a specific CRS.

    Parameters:
    images_folder: str
        The directory to search for raster files.
    dst_crs: str
        The CRS to check against. Default is 'EPSG:4326' (WGS84).

    Returns:
    A list of raster files that are not in the specified CRS.
    """
    # List to store the names of rasters not in the specified CRS
    non_matching_rasters = []

    # Walk through the directory
    for root, dirs, files in os.walk(images_folder):
        for file in files:
            # If the file is a .tif file
            if file.endswith('.tif'):
                file_path = os.path.join(root, file)
                # Open the raster file
                with rasterio.open(file_path) as src:
                    # If the raster is not in the specified CRS
                    if src.crs.to_string() != dst_crs:
                        # Add the raster to the list
                        non_matching_rasters.append(file)

    # If all rasters are in the specified CRS
    if not non_matching_rasters:
        print("All raster files are in the desired projection: {}".format(dst_crs))
    # Return the list of non-matching rasters
    return non_matching_rasters


In [None]:
images_folder = "/content/drive/Shareddrives/ssdd_pcarbomestre/2020-1"
check_rasters_projection(images_folder, dst_crs='EPSG:4326')

['corcovadogulf_2020-01-04_235745.tif',
 'corcovadogulf_2020-01-11_234959.tif',
 'corcovadogulf_2020-01-16_235745.tif',
 'corcovadogulf_2020-01-23_234958.tif',
 'corcovadogulf_2020-01-28_235744.tif',
 'corcovadogulf_2020-02-04_234958.tif',
 'corcovadogulf_2020-02-09_235744.tif',
 'corcovadogulf_2020-02-16_234958.tif',
 'corcovadogulf_2020-02-21_235744.tif',
 'corcovadogulf_2020-02-28_234957.tif',
 'corcovadogulf_2020-03-04_235744.tif',
 'corcovadogulf_2020-03-11_234957.tif',
 'corcovadogulf_2020-03-16_235744.tif',
 'corcovadogulf_2020-03-23_234958.tif',
 'corcovadogulf_2020-03-28_235744.tif',
 'corcovadogulf_2020-04-04_234958.tif',
 'corcovadogulf_2020-04-09_235744.tif',
 'corcovadogulf_2020-04-16_234958.tif',
 'corcovadogulf_2020-04-21_235745.tif',
 'corcovadogulf_2020-04-28_234959.tif',
 'corcovadogulf_2020-01-02_095013.tif',
 'corcovadogulf_2020-01-07_095816.tif',
 'corcovadogulf_2020-01-12_100624.tif',
 'corcovadogulf_2020-01-14_095012.tif']

Reproject all .tif to the same CRS of interest (e.g. EPSG:4326) so there is projection consistency

In [None]:
import os

def reproject_all_to_wgs84(input_dir):
    """
    Reproject all raster files in a directory to WGS84 (EPSG:4326) and overwrite them.

    Parameters:
    input_dir: str
        Path to the directory containing raster files.
    """
    dst_crs = 'EPSG:4326'  # WGS84

    # Iterate over all files in the directory
    for filename in os.listdir(input_dir):
        # Process only files with .tif extension
        if filename.endswith('.tif'):
            input_path = os.path.join(input_dir, filename)

            # Open the input file
            with rasterio.open(input_path) as src:
                transform, width, height = calculate_default_transform(
                    src.crs, dst_crs, src.width, src.height, *src.bounds)
                kwargs = src.meta.copy()
                kwargs.update({
                    'crs': dst_crs,
                    'transform': transform,
                    'width': width,
                    'height': height
                })

                # Create an in-memory raster file
                dst_array = np.empty((src.count, height, width))
                reproject(
                    source=rasterio.band(src, range(1, src.count + 1)),
                    destination=dst_array,
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)

            # Write the reprojected data to the input raster file, overwriting it
            with rasterio.open(input_path, 'w', **kwargs) as dst:
                dst.write(dst_array)


In [None]:
input_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2020-1"

# Call the function
reproject_all_to_wgs84(input_path)

In [None]:
check_rasters_projection(images_folder, dst_crs='EPSG:4326')

All raster files are in the desired projection: EPSG:4326


[]

# Detect vessels and extract coordinates

## Entire images analysis

In [None]:
def process_tiff_image(path, d2_predictor):
    # Read the TIFF image with rasterio
    with rasterio.open(path) as src:
        img = src.read()
        # Set the new minimum and maximum values
        new_min = -20
        new_max = 10
        # Clip the pixel values to the new range
        img_clipped = np.clip(img, new_min, new_max).astype(src.profile['dtype'])

    # Transpose the image dimensions to BGR format (1, 4867, 4992) to (4867, 4992)
    img_bgr = img_clipped.transpose(1, 2, 0)[:, :, ::-1]

    # Rescale the image to 0-255 range and transform dtype from float64 to uint8
    img_bgr = cv2.normalize(img_bgr, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    # Convert the image from grayscale to BGR format (4867, 4992, 3)
    img_bgr_new = cv2.cvtColor(img_bgr, cv2.COLOR_GRAY2BGR)

    # Make predictions on the image
    outputs = d2_predictor(img_bgr_new)

    # Get the bounding boxes for the predicted objects
    boxes = outputs['instances'].pred_boxes.tensor.cpu().numpy()
    scores = outputs['instances'].scores.cpu().numpy()

    # Define the georeferencing transformation function
    def pixel_to_geo(x, y):
        geo_x = x * gt[1] + gt[0]
        geo_y = y * gt[5] + gt[3]
        return geo_x, geo_y

    # Load the georeferencing information from the image
    ds = gdal.Open(path)
    gt = ds.GetGeoTransform()
    proj = ds.GetProjection()

    # Define the object containing the bounding boxes
    bbox_array = boxes

    # Calculate the centroid of each bounding box in geographic coordinates
    centroid_array = np.zeros((len(bbox_array), 2))
    for i, bbox in enumerate(bbox_array):
        xmin, ymin, xmax, ymax = bbox
        geo_xmin, geo_ymin = pixel_to_geo(xmin, ymin)
        geo_xmax, geo_ymax = pixel_to_geo(xmax, ymax)
        centroid_array[i, 0] = (geo_xmin + geo_xmax) / 2
        centroid_array[i, 1] = (geo_ymin + geo_ymax) / 2

    # Create a list to store the detected objects
    detected_objects = []

    filename = ds.GetDescription()
    # extract location from file name
    location = filename.split('/')[-1].split('_')[0]
    # extract date from file name
    date_str = filename.split('/')[-1].split('_')[1]
    # extract time from file name
    time_str = filename.split('/')[-1].split('_')[2].split('.')[0]

    for i, centroid in enumerate(centroid_array):
        x, y = centroid
        # get the score values for the current point
        current_score = scores[i]
        # Create a dictionary to store the object's attributes
        obj_dict = {
            'centroid': (x, y),
            'location': location,
            'date': date_str,
            'time': time_str,
            'score': current_score
        }
        # Append the dictionary to detected_objects
        detected_objects.append(obj_dict)

    return(detected_objects)

In [None]:
def process_folder(path, d2_predictor):
    # Create a list to store all detected objects
    detected_objects_all = []
    # Iterate over all files in the folder
    for filename in os.listdir(path):
        if filename.endswith(".tif"):
            filepath = os.path.join(path, filename)
            # Process the TIFF image
            detected_objects = process_tiff_image(filepath, d2_predictor)
            # Add the detected objects to the list
            detected_objects_all.extend(detected_objects)

    # Save the centroid coordinates as points in a shapefile
    sf_path = '/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/study_case/shapefiles/'
    sf = shapefile.Writer(sf_path + 'centroids')
    sf.field('location', 'C', size=50) # add a new field for location
    sf.field('date', 'C', size=50) # add a new field for date
    sf.field('time', 'C', size=50) # add a new field for time
    sf.field('score', 'F', size=10, decimal=9) # add a new field for score
    sf.autoBalance = 1

    # Add points to the Shapefile
    for obj in detected_objects_all:
        centroid = obj['centroid']
        location = obj['location']
        date = obj['date']
        time = obj['time']
        score = obj['score']

        # Add the point to the Shapefile
        sf.point(*centroid)
        # Add the attributes to the Shapefile
        sf.record(location, date, time, score)
    sf.close()

    # Set the projection of the shapefile
    prj_content = pyproj.CRS.from_string('EPSG:4326').to_wkt()
    with open(sf_path + 'centroids' + '.prj', 'w') as prj_file:
        prj_file.write(prj_content)
    # Save and close the shapefile
    sf.close()

In [None]:
process_folder('/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/study_case/sentinel_images', predictor)

## Subsetting analysis

In [None]:
def process_tiff_image_subsetting(path, d2_predictor):
    # Read the TIFF image with rasterio
    with rasterio.open(path) as src:
        img = src.read()
        # Set the new minimum and maximum values
        new_min = -20
        new_max = 10
        # Clip the pixel values to the new range
        img_clipped = np.clip(img, new_min, new_max).astype(src.profile['dtype'])

    # Transpose the image dimensions to BGR format (1, 4867, 4992) to (4867, 4992)
    img_bgr = img_clipped.transpose(1, 2, 0)[:, :, ::-1]

    # Rescale the image to 0-255 range and transform dtype from float64 to uint8
    img_bgr = cv2.normalize(img_bgr, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    # Convert the image from grayscale to BGR format (4867, 4992, 3)
    img_bgr_new = cv2.cvtColor(img_bgr, cv2.COLOR_GRAY2BGR)

    # Define the size of the sub-images
    sub_img_height = 800
    sub_img_width = 800

    # Get the dimensions of the original image
    img_height, img_width, _ = img_bgr_new.shape

    # Compute the number of sub-images needed to cover the original image
    num_sub_images_height = int(np.ceil(img_height / sub_img_height))
    num_sub_images_width = int(np.ceil(img_width / sub_img_width))

    # Create a list to hold the predictions for each sub-image
    all_boxes = []
    all_scores = []

    # Loop over the sub-images
    for i in range(num_sub_images_height):
        for j in range(num_sub_images_width):
            # Compute the starting and ending indices for the sub-image
            start_h = i * sub_img_height
            end_h = min((i + 1) * sub_img_height, img_height)
            start_w = j * sub_img_width
            end_w = min((j + 1) * sub_img_width, img_width)

            # Extract the sub-image
            sub_img = img_bgr_new[start_h:end_h, start_w:end_w, :]

            # Make predictions on the sub-image
            outputs = d2_predictor(sub_img)

            # Get the bounding boxes and scores for the predicted objects
            boxes = outputs['instances'].pred_boxes.tensor.cpu().numpy()
            scores = outputs['instances'].scores.cpu().numpy()

            # Convert the bounding box coordinates from sub-image to original image coordinates
            boxes[:, 0::2] += start_w
            boxes[:, 1::2] += start_h

            # Add the bounding boxes and scores to the list for all sub-images
            all_boxes.append(boxes)
            all_scores.append(scores)

    # Concatenate the bounding boxes and scores for all sub-images
    all_boxes = np.concatenate(all_boxes, axis=0)
    all_scores = np.concatenate(all_scores, axis=0)

    # Define the georeferencing transformation function
    def pixel_to_geo(x, y):
        geo_x = x * gt[1] + gt[0]
        geo_y = y * gt[5] + gt[3]
        return geo_x, geo_y

    # Load the georeferencing information from the image
    ds = gdal.Open(path)
    gt = ds.GetGeoTransform()
    proj = ds.GetProjection()


    # Define the object containing the bounding boxes
    bbox_array = all_boxes

    # Calculate the centroid of each bounding box in geographic coordinates
    centroid_array = np.zeros((len(bbox_array), 2))
    for i, bbox in enumerate(bbox_array):
        xmin, ymin, xmax, ymax = bbox
        geo_xmin, geo_ymin = pixel_to_geo(xmin, ymin)
        geo_xmax, geo_ymax = pixel_to_geo(xmax, ymax)
        centroid_array[i, 0] = (geo_xmin + geo_xmax) / 2
        centroid_array[i, 1] = (geo_ymin + geo_ymax) / 2

    # Create a list to store the detected objects
    detected_objects = []

    filename = ds.GetDescription()
    # extract location from file name
    location = filename.split('/')[-1].split('_')[0]
    # extract date from file name
    date_str = filename.split('/')[-1].split('_')[1]
    # extract time from file name
    time_str = filename.split('/')[-1].split('_')[2].split('.')[0]

    for i, centroid in enumerate(centroid_array):
        x, y = centroid
        # get the score values for the current point
        current_score = all_scores[i]
        # Create a dictionary to store the object's attributes
        obj_dict = {
            'centroid': (x, y),
            'location': location,
            'date': date_str,
            'time': time_str,
            'score': current_score
        }
        # Append the dictionary to detected_objects
        detected_objects.append(obj_dict)

    return(detected_objects)

In [None]:
def process_folder_subsetting(path, d2_predictor):
    # Create a list to store all detected objects
    detected_objects_all = []
    # Iterate over all files in the folder
    for filename in os.listdir(path):
        if filename.endswith(".tif"):
            filepath = os.path.join(path, filename)
            # Process the TIFF image
            print(filename)
            detected_objects = process_tiff_image_subsetting(filepath, d2_predictor)
            # Add the detected objects to the list
            detected_objects_all.extend(detected_objects)

    # Save the centroid coordinates as points in a shapefile
    sf_path = '/content/drive/MyDrive/SSDD_pcarbomestre_2.0/study_case/shapefiles/'
    sf = shapefile.Writer(sf_path + 'centroids_sub')
    sf.field('location', 'C', size=50) # add a new field for location
    sf.field('date', 'C', size=50) # add a new field for date
    sf.field('time', 'C', size=50) # add a new field for time
    sf.field('score', 'F', size=10, decimal=9) # add a new field for score
    sf.autoBalance = 1

    # Add points to the Shapefile
    for obj in detected_objects_all:
        centroid = obj['centroid']
        location = obj['location']
        date = obj['date']
        time = obj['time']
        score = obj['score']

        # Add the point to the Shapefile
        sf.point(*centroid)
        # Add the attributes to the Shapefile
        sf.record(location, date, time, score)
    sf.close()

    # Set the projection of the shapefile
    prj_content = pyproj.CRS.from_string('EPSG:4326').to_wkt()
    with open(sf_path + 'centroids_sub' + '.prj', 'w') as prj_file:
        prj_file.write(prj_content)
    # Save and close the shapefile
    sf.close()

In [None]:
input_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2020-1"

process_folder_subsetting(input_path, predictor)

corcovadogulf_2020-01-04_235745.tif


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


corcovadogulf_2020-01-11_234959.tif
corcovadogulf_2020-01-16_235745.tif
corcovadogulf_2020-01-23_234958.tif
corcovadogulf_2020-01-28_235744.tif
corcovadogulf_2020-02-04_234958.tif
corcovadogulf_2020-02-09_235744.tif
corcovadogulf_2020-02-16_234958.tif
corcovadogulf_2020-02-21_235744.tif
corcovadogulf_2020-02-28_234957.tif
corcovadogulf_2020-03-04_235744.tif
corcovadogulf_2020-03-11_234957.tif
corcovadogulf_2020-03-16_235744.tif
corcovadogulf_2020-03-23_234958.tif
corcovadogulf_2020-03-28_235744.tif
corcovadogulf_2020-04-04_234958.tif
corcovadogulf_2020-04-09_235744.tif
corcovadogulf_2020-04-16_234958.tif
corcovadogulf_2020-04-21_235745.tif
corcovadogulf_2020-04-28_234959.tif
corcovadogulf_2020-01-02_095013.tif
corcovadogulf_2020-01-07_095816.tif
corcovadogulf_2020-01-12_100624.tif
corcovadogulf_2020-01-14_095012.tif


In [None]:
def process_tiff_image_subsetting(path, d2_predictor):
   # Reproject the image to EPSG:4326
    src = reproject_tiff(path)

    img = src.ReadAsArray()

    # Read the TIFF image with rasterio
    with rasterio.open(path) as src:
        img = src.read()
        # Set the new minimum and maximum values
        new_min = -20
        new_max = 10
        # Clip the pixel values to the new range
        img_clipped = np.clip(img, new_min, new_max).astype(src.profile['dtype'])

    # Transpose the image dimensions to BGR format (1, 4867, 4992) to (4867, 4992)
    img_bgr = img_clipped.transpose(1, 2, 0)[:, :, ::-1]

    # Rescale the image to 0-255 range and transform dtype from float64 to uint8
    img_bgr = cv2.normalize(img_bgr, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    # Convert the image from grayscale to BGR format (4867, 4992, 3)
    img_bgr_new = cv2.cvtColor(img_bgr, cv2.COLOR_GRAY2BGR)

    # Define the size of the sub-images
    sub_img_height = 800
    sub_img_width = 800

    # Get the dimensions of the original image
    img_height, img_width, _ = img_bgr_new.shape

    # Compute the number of sub-images needed to cover the original image
    num_sub_images_height = int(np.ceil(img_height / sub_img_height))
    num_sub_images_width = int(np.ceil(img_width / sub_img_width))

    # Create a list to hold the predictions for each sub-image
    all_boxes = []
    all_scores = []

    # Loop over the sub-images
    for i in range(num_sub_images_height):
        for j in range(num_sub_images_width):
            # Compute the starting and ending indices for the sub-image
            start_h = i * sub_img_height
            end_h = min((i + 1) * sub_img_height, img_height)
            start_w = j * sub_img_width
            end_w = min((j + 1) * sub_img_width, img_width)

            # Extract the sub-image
            sub_img = img_bgr_new[start_h:end_h, start_w:end_w, :]

            # Make predictions on the sub-image
            outputs = d2_predictor(sub_img)

            # Get the bounding boxes and scores for the predicted objects
            boxes = outputs['instances'].pred_boxes.tensor.cpu().numpy()
            scores = outputs['instances'].scores.cpu().numpy()

            # Convert the bounding box coordinates from sub-image to original image coordinates
            boxes[:, 0::2] += start_w
            boxes[:, 1::2] += start_h

            # Add the bounding boxes and scores to the list for all sub-images
            all_boxes.append(boxes)
            all_scores.append(scores)

    # Concatenate the bounding boxes and scores for all sub-images
    all_boxes = np.concatenate(all_boxes, axis=0)
    all_scores = np.concatenate(all_scores, axis=0)

    # Define the georeferencing transformation function
    def pixel_to_geo(x, y):
        geo_x = x * gt[1] + gt[0]
        geo_y = y * gt[5] + gt[3]
        return geo_x, geo_y

    # Load the georeferencing information from the image
    ds = gdal.Open(path)
    gt = ds.GetGeoTransform()
    proj = ds.GetProjection()


    # Define the object containing the bounding boxes
    bbox_array = all_boxes

    # Calculate the centroid of each bounding box in geographic coordinates
    centroid_array = np.zeros((len(bbox_array), 2))
    for i, bbox in enumerate(bbox_array):
        xmin, ymin, xmax, ymax = bbox
        geo_xmin, geo_ymin = pixel_to_geo(xmin, ymin)
        geo_xmax, geo_ymax = pixel_to_geo(xmax, ymax)
        centroid_array[i, 0] = (geo_xmin + geo_xmax) / 2
        centroid_array[i, 1] = (geo_ymin + geo_ymax) / 2

    # Create a list to store the detected objects
    detected_objects = []

    filename = ds.GetDescription()
    # extract location from file name
    location = filename.split('/')[-1].split('_')[0]
    # extract date from file name
    date_str = filename.split('/')[-1].split('_')[1]
    # extract time from file name
    time_str = filename.split('/')[-1].split('_')[2].split('.')[0]

    for i, centroid in enumerate(centroid_array):
        x, y = centroid
        # get the score values for the current point
        current_score = all_scores[i]
        # Create a dictionary to store the object's attributes
        obj_dict = {
            'centroid': (x, y),
            'location': location,
            'date': date_str,
            'time': time_str,
            'score': current_score
        }
        # Append the dictionary to detected_objects
        detected_objects.append(obj_dict)

    return(detected_objects)

In [None]:
import csv

def process_folder_subsetting(path, d2_predictor):
    # Create a list to store all detected objects
    detected_objects_all = []

    # Iterate over all files in the folder
    for filename in os.listdir(path):
        if filename.endswith(".tif"):
            filepath = os.path.join(path, filename)
            # Process the TIFF image
            print(filename)
            detected_objects = process_tiff_image_subsetting(filepath, d2_predictor)
            # Add the detected objects to the list
            detected_objects_all.extend(detected_objects)

    # Save the data in a CSV file
    csv_path = '/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/study_case/csv/'
    with open(csv_path + 'centroids_sub.csv', 'w', newline='') as csvfile:
        fieldnames = ['centroid_x', 'centroid_y', 'location', 'date', 'time', 'score']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for obj in detected_objects_all:
            centroid = obj['centroid']
            location = obj['location']
            date = obj['date']
            time = obj['time']
            score = obj['score']

            # Write the attributes to the CSV file
            writer.writerow({'centroid_x': centroid[0], 'centroid_y': centroid[1], 'location': location, 'date': date, 'time': time, 'score': score})


In [None]:
def reproject_tiff(path):
    # Define target SRS
    dst_srs = osr.SpatialReference()
    dst_srs.ImportFromEPSG(4326)  # WGS 84

    # Load source image
    src_ds = gdal.Open(path)
    src_srs = osr.SpatialReference()
    src_srs.ImportFromWkt(src_ds.GetProjection())

    # Reproject in memory
    mem_ds = gdal.Warp('', src_ds, dstSRS=dst_srs, format='MEM')

    return mem_ds


In [None]:
process_folder_subsetting('/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/study_case/sentinel_images/2018-1/test', predictor)

corcovadogulf_2018-01-03_234904.tif
corcovadogulf_2018-02-17_094941.tif


In [None]:
process_tiff_image_subsetting('/content/drive/Shareddrives/ssdd_pcarbomestre/SSDD_pcarbomestre_2.0/study_case/sentinel_images/2018-1/test/Copyofcorcovadogulf_2018-02-17_094941.tif', predictor)

[{'centroid': (-73.01604260633118, -43.60991769047326),
  'location': 'Copyofcorcovadogulf',
  'date': '2018-02-17',
  'time': '094941',
  'score': 0.6000059}]

In [None]:
def reproject_to_wgs84(input_path):
    """
    Reproject a raster file to WGS84 (EPSG:4326) and overwrite the input.

    Parameters:
    input_path: str
        Path to the input raster file.
    """
    dst_crs = 'EPSG:4326'  # WGS84

    # Open the input file
    with rasterio.open(input_path) as src:
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        # Create an in-memory raster file
        dst_array = np.empty((src.count, height, width))
        reproject(
            source=rasterio.band(src, range(1, src.count + 1)),
            destination=dst_array,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=transform,
            dst_crs=dst_crs,
            resampling=Resampling.nearest)

    # Write the reprojected data to the input raster file, overwriting it
    with rasterio.open(input_path, 'w', **kwargs) as dst:
        dst.write(dst_array)


In [None]:
input_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2018-1/corcovadogulf_2018-01-05_095748.tif"
output_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2018-1/corcovadogulf_2018-01-05_095748_repro.tif"

# Call the function
reproject_to_wgs84(input_path)

In [None]:
import os
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling

def reproject_tif(input_tif_path, output_folder):
    # Construct the output file path
    output_tif_path = os.path.join(output_folder)

    # Open the input file
    with rasterio.open(input_tif_path) as src:
        # Read the original CRS and data
        crs = src.crs

        # Calculate the transform and width and height for the new CRS
        transform, width, height = calculate_default_transform(crs, 'EPSG:4326', src.width, src.height, *src.bounds)

        # Define the metadata for the output file
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': 'EPSG:4326',
            'transform': transform,
            'width': width,
            'height': height
        })

        # Open the output file and reproject the input data into it
        with rasterio.open(output_tif_path, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs='EPSG:4326',
                    resampling=Resampling.nearest)



In [None]:
input_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2018-1/corcovadogulf_2018-01-05_095748.tif"
output_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2018-1/corcovadogulf_2018-01-05_095748_repro.tif"

# Call the function
reproject_tif(input_path, output_path)

In [None]:
import os

def reproject_all_to_wgs84(input_dir):
    """
    Reproject all raster files in a directory to WGS84 (EPSG:4326) and overwrite them.

    Parameters:
    input_dir: str
        Path to the directory containing raster files.
    """
    dst_crs = 'EPSG:4326'  # WGS84

    # Iterate over all files in the directory
    for filename in os.listdir(input_dir):
        # Process only files with .tif extension
        if filename.endswith('.tif'):
            input_path = os.path.join(input_dir, filename)

            # Open the input file
            with rasterio.open(input_path) as src:
                transform, width, height = calculate_default_transform(
                    src.crs, dst_crs, src.width, src.height, *src.bounds)
                kwargs = src.meta.copy()
                kwargs.update({
                    'crs': dst_crs,
                    'transform': transform,
                    'width': width,
                    'height': height
                })

                # Create an in-memory raster file
                dst_array = np.empty((src.count, height, width))
                reproject(
                    source=rasterio.band(src, range(1, src.count + 1)),
                    destination=dst_array,
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)

            # Write the reprojected data to the input raster file, overwriting it
            with rasterio.open(input_path, 'w', **kwargs) as dst:
                dst.write(dst_array)


In [None]:
input_path = "/content/drive/Shareddrives/ssdd_pcarbomestre/2018-1"

# Call the function
reproject_all_to_wgs84(input_path)

KeyboardInterrupt: ignored