<a href="https://colab.research.google.com/github/mbatiasonic/Bike-Sharing-Trends/blob/main/Copy_of_DeepForest_Training_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#DeepForest Training Walkthrough(CPU/GPU)
  - For GPU implementation.
      1. Select **Runtime** > Change **runtime type** and Select GPU as Hardware accelerator.

In [1]:
#install the package, on colab make sure to upgrade existing packages. This is not needed in a clean env.
! pip install --upgrade deepforest

Collecting deepforest
  Downloading deepforest-1.3.3-py3-none-any.whl (20.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m64.1 MB/s[0m eta [36m0:00:00[0m
Collecting imagecodecs (from deepforest)
  Downloading imagecodecs-2024.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (39.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.5/39.5 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning>=1.5.8 (from deepforest)
  Downloading pytorch_lightning-2.2.5-py3-none-any.whl (802 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m802.3/802.3 kB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rasterio (from deepforest)
  Downloading rasterio-1.3.10-cp310-cp310-manylinux2014_x86_64.whl (21.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.5/21.5 MB[0m [31m48.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting recommonmark (from deepforest)
  

In [2]:
pip install git+https://github.com/weecology/DeepForest.git

Collecting git+https://github.com/weecology/DeepForest.git
  Cloning https://github.com/weecology/DeepForest.git to /tmp/pip-req-build-7_auzvql
  Running command git clone --filter=blob:none --quiet https://github.com/weecology/DeepForest.git /tmp/pip-req-build-7_auzvql
  Resolved https://github.com/weecology/DeepForest.git to commit 670afd57b40e15f06fe175864ff54fbeb91f9feb
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [4]:
#load the modules
import os
import time
import torch
import numpy as np
from deepforest import main
from deepforest import get_data
from deepforest import utilities
from deepforest import preprocess


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
#convert hand annotations from xml into retinanet format
#The get_data function is only needed when fetching sample package data
YELL_xml = get_data("/content/drive/MyDrive/overlap/DJI_0175.xml")
annotation = utilities.xml_to_annotations(YELL_xml)
annotation.head()

Unnamed: 0,image_path,xmin,ymin,xmax,ymax,label
0,DJI_0175.JPG,259,91,345,173,Tree
1,DJI_0175.JPG,62,412,179,512,Tree
2,DJI_0175.JPG,246,304,317,365,Tree
3,DJI_0175.JPG,833,784,941,881,Tree
4,DJI_0175.JPG,1202,845,1320,952,Tree


In [7]:
def check_and_fix_bboxes(df):
    """
    Check and fix bounding boxes in the DataFrame.
    """
    invalid_bboxes = []
    for index, row in df.iterrows():
        xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']
        if xmax <= xmin or ymax <= ymin:
            invalid_bboxes.append(index)
            # Fix the bbox by setting some default values or removing the entry
            df.at[index, 'xmax'] = max(xmax, xmin + 0.01)  # Example fix
            df.at[index, 'ymax'] = max(ymax, ymin + 0.01)  # Example fix

    print(f"Found {len(invalid_bboxes)} invalid bounding boxes.")
    return df

In [8]:
annotation = utilities.xml_to_annotations(YELL_xml)

In [9]:
# Check and fix the bounding boxes
annotations= check_and_fix_bboxes(annotation)

Found 1 invalid bounding boxes.


In [10]:
# Save the corrected annotations
annotations.to_xml("/content/drive/MyDrive/overlap/DJI_0175_1.xml", index=False)

In [11]:
#load the image file corresponding to the annotaion file
YELL_train = get_data("/content/drive/MyDrive/overlap/DJI_0175.JPG")
image_path = os.path.dirname(YELL_train)
#Write converted dataframe to file. Saved alongside the images
annotation.to_csv(os.path.join(image_path,"train_example.csv"), index=False)

## Prepare Training and Validation Data
  - 75% Training Data
  - 25% Validation Data

In [18]:
#Find annotation path
annotation_path = os.path.join(image_path,"train_example.csv")
#crop images will save in a newly created directory
#os.mkdir(os.getcwd(),'train_data_folder')
crop_dir = os.path.join(os.getcwd(),'train_data_folder')
train_annotations= preprocess.split_raster(path_to_raster=YELL_train,
                                 annotations_file=annotation_path,
                                 base_dir=crop_dir,
                                 patch_size=800,
                                 patch_overlap=0.05)

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


In [19]:
#Split image crops into training and test. Normally these would be different tiles! Just as an example.
image_paths = train_annotations.image_path.unique()
#split 25% validation annotation
valid_paths = np.random.choice(image_paths, int(len(image_paths)*0.25) )
valid_annotations = train_annotations.loc[train_annotations.image_path.isin(valid_paths)]
train_annotations = train_annotations.loc[~train_annotations.image_path.isin(valid_paths)]

In [20]:
#View output
train_annotations.head()
print("There are {} training crown annotations".format(train_annotations.shape[0]))
print("There are {} test crown annotations".format(valid_annotations.shape[0]))

#save to file and create the file dir
annotations_file= os.path.join(crop_dir,"train.csv")
validation_file= os.path.join(crop_dir,"valid.csv")
#Write window annotations file without a header row, same location as the "base_dir" above.
train_annotations.to_csv(annotations_file,index=False)
valid_annotations.to_csv(validation_file,index=False)

There are 384 training crown annotations
There are 139 test crown annotations


In [21]:
annotations_file

'/content/train_data_folder/train.csv'

## Training & Evaluating Using CPU

In [12]:
#initial the model and change the corresponding config file
m = main.deepforest()
m.config["train"]["csv_file"] = annotations_file
m.config["train"]["root_dir"] = os.path.dirname(annotations_file)
#Since this is a demo example and we aren't training for long, only show the higher quality boxes
m.config["score_thresh"] = 0.4
m.config["train"]['epochs'] = 2
m.config["validation"]["csv_file"] = validation_file
m.config["validation"]["root_dir"] = os.path.dirname(validation_file)
#create a pytorch lighting trainer used to training
m.create_trainer()
#load the lastest release model
m.use_release()

Reading config file: /usr/local/lib/python3.10/dist-packages/deepforest/data/deepforest_config.yml


Downloading: "https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth" to /root/.cache/torch/hub/checkpoints/retinanet_resnet50_fpn_coco-eeacb38b.pth
100%|██████████| 130M/130M [00:01<00:00, 131MB/s]


No validation file provided. Turning off validation loop


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


NameError: name 'annotations_file' is not defined

In [None]:
start_time = time.time()
m.trainer.fit(m)
print(f"--- Training on CPU: {(time.time() - start_time):.2f} seconds ---")

In [None]:
m.get_transform

In [None]:
#create a directory to save the predict image
save_dir = os.path.join(os.getcwd(),'pred_result')
try:
  os.mkdir(save_dir)
except FileExistsError:
  pass
results = m.evaluate(annotations_file, os.path.dirname(annotations_file), iou_threshold = 0.4, savedir = save_dir)

## Training & Evaluating Using GPU

In [23]:
#initial the model and change the corresponding config file
m = main.deepforest()
m.config['gpus'] = '-1' #move to GPU and use all the GPU resources
m.config["train"]["csv_file"] = annotations_file
m.config["train"]["root_dir"] = os.path.dirname(annotations_file)
m.config["score_thresh"] = 0.4
m.config["train"]['epochs'] = 3
m.config["validation"]["csv_file"] = validation_file
m.config["validation"]["root_dir"] = os.path.dirname(validation_file)
#create a pytorch lighting trainer used to training
m.create_trainer()
#load the lastest release model
m.use_release()

Reading config file: /usr/local/lib/python3.10/dist-packages/deepforest/data/deepforest_config.yml


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


No validation file provided. Turning off validation loop
Model from DeepForest release https://github.com/weecology/DeepForest/releases/tag/1.0.0 was already downloaded. Loading model from file.
Loading pre-built model: https://github.com/weecology/DeepForest/releases/tag/1.0.0


In [24]:
start_time = time.time()
m.trainer.fit(m)
print(f"--- Training on GPU: {(time.time() - start_time):.2f} seconds ---")

# Save the model
torch.save(m.state_dict(), '/content/drive/MyDrive/overlap/Trainedmodel/model.pth')
print("Model saved successfully.")

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type                  | Params
-----------------------------------------------------
0 | model      | RetinaNet             | 32.1 M
1 | iou_metric | IntersectionOverUnion | 0     
2 | mAP_metric | MeanAveragePrecision  | 0     
-----------------------------------------------------
31.9 M    Trainable params
222 K     Non-trainable params
32.1 M    Total params
128.592   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 3. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
  self.pid = os.fork()
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (30) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


Validation: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()
  self.pid = os.fork()


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
  self.pid = os.fork()


--- Training on GPU: 34.88 seconds ---
Model saved successfully.


In [25]:
#save the prediction result to a prediction folder
save_dir = os.path.join(os.getcwd(),'pred_result')
try:
  os.mkdir(save_dir)
except FileExistsError:
  pass
results = m.evaluate(annotations_file, os.path.dirname(annotations_file), iou_threshold = 0.4, savedir= save_dir)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  self.pid = os.fork()


Predicting: |          | 0/? [00:00<?, ?it/s]

In [26]:
results

{'results':     prediction_id  truth_id       IoU     score  xmin  xmax  ymin   ymax  \
 0               2         0  0.894841  0.313452   259   345    91  173.0   
 1               7         1  0.739404  0.200498    62   179   412  512.0   
 2               9         2  0.400796  0.180802   246   317   304  365.0   
 3              10         3  0.321714  0.180559     0    89   591  800.0   
 0               5         0  0.947755  0.609846     0    72    89  160.0   
 ..            ...       ...       ...       ...   ...   ...   ...    ...   
 4               4         4  0.755244  0.605225   683   800     0   68.0   
 5               5         5  0.706389  0.522825   507   606   646  722.0   
 6              14         6  0.605334  0.331703   495   584   725  796.0   
 7               0         7  0.825723  0.661273   413   494    28   93.0   
 8               7         8  0.000000  0.468739   532   604   579  643.0   
 
    predicted_label true_label       image_path  match  
 0    

In [27]:
results['box_precision']

0.47229334376121096

In [28]:
results["box_recall"]

0.9705678012695557

In [29]:
results["results"]

Unnamed: 0,prediction_id,truth_id,IoU,score,xmin,xmax,ymin,ymax,predicted_label,true_label,image_path,match
0,2,0,0.894841,0.313452,259,345,91,173.0,Tree,Tree,DJI_0175_0.png,True
1,7,1,0.739404,0.200498,62,179,412,512.0,Tree,Tree,DJI_0175_0.png,True
2,9,2,0.400796,0.180802,246,317,304,365.0,Tree,Tree,DJI_0175_0.png,True
3,10,3,0.321714,0.180559,0,89,591,800.0,Tree,Tree,DJI_0175_0.png,False
0,5,0,0.947755,0.609846,0,72,89,160.0,Tree,Tree,DJI_0175_10.png,True
...,...,...,...,...,...,...,...,...,...,...,...,...
4,4,4,0.755244,0.605225,683,800,0,68.0,Tree,Tree,DJI_0175_9.png,True
5,5,5,0.706389,0.522825,507,606,646,722.0,Tree,Tree,DJI_0175_9.png,True
6,14,6,0.605334,0.331703,495,584,725,796.0,Tree,Tree,DJI_0175_9.png,True
7,0,7,0.825723,0.661273,413,494,28,93.0,Tree,Tree,DJI_0175_9.png,True


In [30]:
results["class_recall"]

Unnamed: 0,label,recall,precision,size
0,0,1.0,1.0,374


In [31]:
# import torch
# # Save the trained model
# torch.save(m.model.state_dict(), "/content/drive/MyDrive/overlap/Trainedmodel/model.pth")

In [32]:
import collections
# Load the trained model
loaded_model = main.deepforest()
loaded_model.use_release()
state_dict = torch.load("/content/drive/MyDrive/overlap/Trainedmodel/model.pth")

# Remove the 'model.' prefix from the state_dict keys
new_state_dict = collections.OrderedDict([(k[6:], v) if k.startswith('model.') else (k, v) for k, v in state_dict.items()])

loaded_model.model.load_state_dict(new_state_dict)  # Load the modified state_dict

Reading config file: /usr/local/lib/python3.10/dist-packages/deepforest/data/deepforest_config.yml


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


No validation file provided. Turning off validation loop
Model from DeepForest release https://github.com/weecology/DeepForest/releases/tag/1.0.0 was already downloaded. Loading model from file.
Loading pre-built model: https://github.com/weecology/DeepForest/releases/tag/1.0.0


<All keys matched successfully>

In [33]:
# # Load the trained model
# loaded_model = main.deepforest()
# loaded_model.use_release()
# state_dict = torch.load("/content/drive/MyDrive/overlap/Trainedmodel/model.pth")
# loaded_model.model.load_state_dict(state_dict)

In [36]:
# Conduct tests on new images from your surroundings
test_image =get_data("/content/drive/MyDrive/overlap/DJI_0176.JPG")
detections = loaded_model.predict_tile(test_image, return_plot = False, patch_size=1000, patch_overlap=0.25)
print(detections)

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  self.pid = os.fork()


Predicting: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


1090 predictions in overlapping windows, applying non-max supression
636 predictions kept after non-max suppression
       xmin    ymin    xmax    ymax label     score    image_path
0    4863.0  1083.0  4981.0  1178.0  Tree  0.872770  DJI_0176.JPG
1    4791.0  2333.0  4932.0  2459.0  Tree  0.868810  DJI_0176.JPG
2    1257.0   654.0  1359.0   736.0  Tree  0.857766  DJI_0176.JPG
3    3792.0  1717.0  3891.0  1805.0  Tree  0.848839  DJI_0176.JPG
4    2428.0   810.0  2532.0   898.0  Tree  0.845845  DJI_0176.JPG
..      ...     ...     ...     ...   ...       ...           ...
631  4655.0  2798.0  4706.0  2843.0  Tree  0.115980  DJI_0176.JPG
632  4506.0  2030.0  4573.0  2085.0  Tree  0.114912  DJI_0176.JPG
633   661.0  1304.0   714.0  1350.0  Tree  0.112825  DJI_0176.JPG
634  1597.0  2820.0  1659.0  2874.0  Tree  0.111513  DJI_0176.JPG
635  5065.0  2943.0  5134.0  3007.0  Tree  0.104547  DJI_0176.JPG

[636 rows x 7 columns]
