### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

# 1 -Setup the environment (this is required only the first time that the environment is setup)

In [9]:
#!git clone https://github.com/ultralytics/yolov5  # clone
#cd yolov5
#pip install -r requirements.txt  # install

In [None]:
# Check if gdal, geopandas, rasterio, cv2 are installed (if not then install them with the following commands)

In [9]:
#conda list

In [2]:
# if gdal is not installed then run this
#%conda install -c conda-forge gdal=3.4.0

In [471]:
#!pip install pandas fiona shapely pyproj rtree

In [133]:
#pip uninstall rtree


In [134]:
#!pip install geopandas

In [7]:
#!pip install geopandas==0.10.2
#!pip install rasterio==1.2.10

# 2 - load libraries

In [1]:
import os, glob,shutil
from osgeo import gdal, ogr, osr
from pathlib import Path
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
import rasterio as rio
import numpy as np
from cv2 import cv2
# load my own functions
os.chdir("/home/datascience/utils")
from tools import tile_ortho, yolo2xy, cleanUp_boudingBoxes, mosaic_yoloPred_shp

In [2]:
import pandas as pd

In [3]:
import warnings
warnings.filterwarnings('ignore')

# 3 - Split large orthomosaic into small tiles (32 meters side)

### Define parameters for splitting orthomosaic into tiles

In [4]:
ortho_path= "/home/datascience/yolov5_snowdamage/data/7661_ortho.tif"
tile_size_m= 32 # length of the side of each tile in meters
buffer_size_m= 3 # size of buffer around each tile 

Get orthomosaic metadata

In [5]:
## get name of the orthomosaic/drone project and the path where it's stored
ortho_name=Path(ortho_path).stem # ortho name
ortho_folder_path=os.path.dirname(ortho_path) # get path name for the folder where the orthomosaic is stored
## Get pixel resolution (in meters) and tile size in pixels
src_ds = gdal.Open(ortho_path) # get raster datasource
_, xres, _, _, _, yres  = src_ds.GetGeoTransform() # get pixel size in meters
tile_size_px= round(tile_size_m/abs(xres)) # calculate the tile size in pixels
## Get EPSG code
proj = osr.SpatialReference(wkt=src_ds.GetProjection())
EPSG_code= proj.GetAttrValue('AUTHORITY',1)

### Run tiling

In [4]:
tile_ortho(ortho_path,tile_size_m,buffer_size_m, "GTiff")

KeyboardInterrupt: 

# 4 - Inference on tiled pngs

Define confidence threshold

In [6]:
conf_thres=0.4

Create output folder

In [7]:
# define output path
tiles_dir=os.path.dirname(ortho_path)+"/tiles_dir"
output_dir= tiles_dir+'/predictions'
# create folder if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

### Predict

In [None]:
/home/datascience/yolov5/detect.py --source $tiles_dir --weights /home/datascience/yolov5_snowdamage/model/best.pt --img 640 --name $output_dir --save-txt --save-conf --nosave --conf-thres=0.4

# 5 - Convert all predictions from image to map coords and merge

Define where YOLO predictions are stored

In [12]:
labels_dir=output_dir+"/labels"
intile=1
iou_thresh=0.75

### Parse and mosaic YOLO prediction

In [13]:
#all_bounding_boxes_aoi= mosaic_yoloPred_shp(tiles_dir, labels_dir, ortho_name, xres, yres, tile_size_m, EPSG_code, intile=1, iou_thresh=0.75)
# Get list of yolo prediction files (.txt)
os.chdir(labels_dir)
labels=[]
for file in glob.glob("*.txt"):
    labels.append(labels_dir+"/"+file)  
# Get list of gtiffs (.tif)
os.chdir(tiles_dir)
gtiffs=[]
for file in glob.glob("*.tif"):
    gtiffs.append(tiles_dir+"/"+file)  

# iterate through each prediction file (.txt) and convert YOLO predictions to shapefile
iter_all=0
for lab in range(len(labels)):
    print(str(round(lab/len(labels)*100))+" % done!")
    # Define one label file and select the corresponding geotiff image
    label_file=labels[lab]
    label_file_name=Path(label_file).stem # ortho name
    for p in gtiffs:
        if Path(p).stem ==label_file_name:
            gtiff_file=p

    # determing image witdth and height
    r = gdal.Open(gtiff_file)
    img_width=r.RasterXSize
    img_height=r.RasterYSize

    # Convert from yolo coordinates to x1, y1, x2, y2,
    coords= yolo2xy(label_file, img_width, img_height) # class, x1, y1, x2, y2, probability 

    # Convert from image to geographical coordinates
    ## select tile polygon (from tile index shapefile) that corresponds to the label_file_name
    tile_index_path=tiles_dir+"/"+ortho_name+"_tile_index.shp" # define path to tile index
    tile_index=gpd.read_file(tile_index_path) # read tile index shapefile
    one_tile=tile_index[tile_index['ID']==label_file_name+".tif"] # Select tile in tile_index that has ID equal to label_file_name

    ## get tile bounding box geographical coordinates (UTM)
    one_tile_XminUTM=one_tile.total_bounds[0]
    one_tile_YminUTM=one_tile.total_bounds[1]
    one_tile_XmaxUTM=one_tile.total_bounds[2]
    one_tile_YmaxUTM=one_tile.total_bounds[3]

    ## take inner buffer equal to the buffer_size_m 
    one_tile_innerB= one_tile
    one_tile_innerB['geometry'] = one_tile_innerB.geometry.buffer(-intile)

    ## get inner tile bounding boxes
    one_tile_inner_XminUTM=one_tile_innerB.total_bounds[0]
    one_tile_inner_YminUTM=one_tile_innerB.total_bounds[1]
    one_tile_inner_XmaxUTM=one_tile_innerB.total_bounds[2]
    one_tile_inner_YmaxUTM=one_tile_innerB.total_bounds[3]

    # Now iterate through each bounding box and assign UTM coordinates and create a shapefile
    if iter_all==0:
        iter=0
        for i in coords:
            if iter== 0:
                # Convert bounding box coordinates from image to geographical coords
                X1_UTM=(i[1]*xres)+one_tile_XminUTM
                Y1_UTM=(i[2]*yres)+one_tile_YminUTM+tile_size_m
                X2_UTM=(i[3]*xres)+one_tile_XminUTM
                Y2_UTM=(i[4]*yres)+one_tile_YminUTM+tile_size_m

                # skip bounding box if it's centroid is NOT within the inner tile (removing the overlap)
                X_UTM= (X1_UTM+X2_UTM)/2
                Y_UTM= (Y1_UTM+Y2_UTM)/2
                if X_UTM<one_tile_inner_XminUTM or X_UTM>one_tile_inner_XmaxUTM or Y_UTM<one_tile_inner_YminUTM or Y_UTM>one_tile_inner_YmaxUTM:
                    continue

                # Create polygon shape from geographical coords
                lat_point_list = [Y1_UTM, Y1_UTM, Y2_UTM, Y2_UTM, Y1_UTM]
                lon_point_list = [X1_UTM, X2_UTM, X2_UTM, X1_UTM, X1_UTM]
                polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
                crs = {'init': 'epsg:'+EPSG_code}
                data= {'class': [i[0]], 'prob': [i[5]]}
                bboxes_tile = gpd.GeoDataFrame(data, crs=crs, geometry=[polygon_geom])
                #bboxes_tile['class']=i[0]
                #bboxes_tile['prob']=i[5]
                iter=iter+1

            else :
                # Convert bounding box coordinates from image to geographical coords
                X1_UTM=(i[1]*xres)+one_tile_XminUTM
                Y1_UTM=(i[2]*yres)+one_tile_YminUTM+tile_size_m
                X2_UTM=(i[3]*xres)+one_tile_XminUTM
                Y2_UTM=(i[4]*yres)+one_tile_YminUTM+tile_size_m

                # skip bounding box if it's centroid is NOT within the inner tile (removing the overlap)
                X_UTM= (X1_UTM+X2_UTM)/2
                Y_UTM= (Y1_UTM+Y2_UTM)/2
                if X_UTM<one_tile_inner_XminUTM or X_UTM>one_tile_inner_XmaxUTM or Y_UTM<one_tile_inner_YminUTM or Y_UTM>one_tile_inner_YmaxUTM:
                    continue

                # Create polygon shape from geographical coords
                lat_point_list = [Y1_UTM, Y1_UTM, Y2_UTM, Y2_UTM, Y1_UTM]
                lon_point_list = [X1_UTM, X2_UTM, X2_UTM, X1_UTM, X1_UTM]
                polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
                crs = {'init': 'epsg:'+EPSG_code}
                data= {'class': [i[0]], 'prob': [i[5]]}
                bbox = gpd.GeoDataFrame(data,crs=crs, geometry=[polygon_geom])
                #bbox['class']=i[0]
                #bbox['prob']=i[5]
                # Merge polygons to a single file
                bboxes_tile = bboxes_tile.append(bbox)
                iter=iter+1

        # cleanup boxes (removing overlapping ones)
        clean_boxes= cleanUp_boudingBoxes(bboxes_tile, iou_thresh)

        # store boxes in a shapefile with all bounding boxes 
        all_bboxes= clean_boxes
        iter_all=iter_all+1

    else:
        iter=0
        for i in coords:
            if iter== 0:
                # Convert bounding box coordinates from image to geographical coords
                X1_UTM=(i[1]*xres)+one_tile_XminUTM
                Y1_UTM=(i[2]*yres)+one_tile_YminUTM+tile_size_m
                X2_UTM=(i[3]*xres)+one_tile_XminUTM
                Y2_UTM=(i[4]*yres)+one_tile_YminUTM+tile_size_m

                # skip bounding box if it's centroid is NOT within the inner tile (removing the overlap)
                X_UTM= (X1_UTM+X2_UTM)/2
                Y_UTM= (Y1_UTM+Y2_UTM)/2
                if X_UTM<one_tile_inner_XminUTM or X_UTM>one_tile_inner_XmaxUTM or Y_UTM<one_tile_inner_YminUTM or Y_UTM>one_tile_inner_YmaxUTM:
                    continue

                # Create polygon shape from geographical coords
                lat_point_list = [Y1_UTM, Y1_UTM, Y2_UTM, Y2_UTM, Y1_UTM]
                lon_point_list = [X1_UTM, X2_UTM, X2_UTM, X1_UTM, X1_UTM]
                polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
                crs = {'init': 'epsg:'+EPSG_code}
                data= {'class': [i[0]], 'prob': [i[5]]}
                bboxes_tile = gpd.GeoDataFrame(data, crs=crs, geometry=[polygon_geom])
                #bboxes_tile['class']=i[0]
                #bboxes_tile['prob']=i[5]
                iter=iter+1
            else :
                # Convert bounding box coordinates from image to geographical coords
                X1_UTM=(i[1]*xres)+one_tile_XminUTM
                Y1_UTM=(i[2]*yres)+one_tile_YminUTM+tile_size_m
                X2_UTM=(i[3]*xres)+one_tile_XminUTM
                Y2_UTM=(i[4]*yres)+one_tile_YminUTM+tile_size_m

                # skip bounding box if it's centroid is NOT within the inner tile (removing the overlap)
                X_UTM= (X1_UTM+X2_UTM)/2
                Y_UTM= (Y1_UTM+Y2_UTM)/2
                if X_UTM<one_tile_inner_XminUTM or X_UTM>one_tile_inner_XmaxUTM or Y_UTM<one_tile_inner_YminUTM or Y_UTM>one_tile_inner_YmaxUTM:
                    continue

                # Create polygon shape from geographical coords
                lat_point_list = [Y1_UTM, Y1_UTM, Y2_UTM, Y2_UTM, Y1_UTM]
                lon_point_list = [X1_UTM, X2_UTM, X2_UTM, X1_UTM, X1_UTM]
                polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
                crs = {'init': 'epsg:'+EPSG_code}
                data= {'class': [i[0]], 'prob': [i[5]]}
                bbox = gpd.GeoDataFrame(data,crs=crs, geometry=[polygon_geom])
                #bbox['class']=i[0]
                #bbox['prob']=i[5]
                # Merge polygons to a single file
                bboxes_tile = bboxes_tile.append(bbox)
                iter=iter+1

        # cleanup boxes (removing overlapping ones)
        clean_boxes=cleanUp_boudingBoxes(bboxes_tile, iou_thresh)

        # store boxes in a shapefile with all bounding boxes 
        all_bboxes = all_bboxes.append(clean_boxes)
        iter_all=iter_all+1

0 % done!
0 % done!
1 % done!
1 % done!
2 % done!
2 % done!
3 % done!
3 % done!
4 % done!
4 % done!
4 % done!
5 % done!
5 % done!
6 % done!
6 % done!
7 % done!
7 % done!
8 % done!
8 % done!
8 % done!
9 % done!
9 % done!
10 % done!
10 % done!
11 % done!
11 % done!
12 % done!
12 % done!
12 % done!
13 % done!
13 % done!
14 % done!
14 % done!
15 % done!
15 % done!
15 % done!
16 % done!
16 % done!
17 % done!
17 % done!
18 % done!
18 % done!
19 % done!
19 % done!
19 % done!
20 % done!
20 % done!
21 % done!
21 % done!
22 % done!
22 % done!
23 % done!
23 % done!
23 % done!
24 % done!
24 % done!
25 % done!
25 % done!
26 % done!
26 % done!
27 % done!
27 % done!
27 % done!
28 % done!
28 % done!
29 % done!
29 % done!
30 % done!
30 % done!
31 % done!
31 % done!
31 % done!
32 % done!
32 % done!
33 % done!
33 % done!
34 % done!
34 % done!
35 % done!
35 % done!
35 % done!
36 % done!
36 % done!
37 % done!
37 % done!
38 % done!
38 % done!
38 % done!
39 % done!
39 % done!
40 % done!
40 % done!
41 % done!

Export final shapefile

In [14]:
all_bboxes.to_file(ortho_folder_path+'/predictions.shp', driver='ESRI Shapefile')

# 6 - Cleanup environment 

In [15]:
# delete prediction folder
shutil.rmtree(tiles_dir) 

# End! (unless one wants to summarize the results beyond the boundign box predictions)

# 7 - create PDF report (?)