<a href="https://colab.research.google.com/github/karl-gardner/droplet_detection/blob/master/yolov5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a align="left" href="https://ultralytics.com/yolov5" target="_blank">
<img width="1024", src="https://user-images.githubusercontent.com/26833433/125273437-35b3fc00-e30d-11eb-9079-46f313325424.png"></a>

This is the **official YOLOv5 🚀 notebook** by **Ultralytics**, and is freely available for redistribution under the [GPL-3.0 license](https://choosealicense.com/licenses/gpl-3.0/). 
For more information please visit https://github.com/ultralytics/yolov5 and https://ultralytics.com. Thank you!

# 0. Setup

Clone repo, install dependencies and check PyTorch and GPU.

In [1]:
!git clone https://github.com/karl-gardner/droplet_detection  # clone repo
%cd /content/droplet_detection/yolov5
%pip install -qr requirements.txt  # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images
import os
import shutil
import cv2
import matplotlib.pyplot as plt
import numpy as np
import math
from google.colab import files
import random
import csv

%cd /content/droplet_detection
import funcs


clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Setup complete. Using torch 1.12.1+cu113 (Tesla P100-PCIE-16GB)


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 1.1 Droplet model dataset from roboflow (PC3DropletDetection2)

Data With Augmentation for Training (final_dataset)

In [3]:
%cd /content/droplet_detection
!curl -L "[ROBOFLOW-API-KEY]" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip
clear_output()

Data with No Augmentation (No_Augmentation)

In [4]:
# %cd /content/droplet_detection
# !curl -L "[ROBOFLOW-API-KEY]" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip
# clear_output()

# 1.2 Cell model dataset from roboflow (Cropped_Drops2)

Data With Augmentation for Training (final_dataset)

In [5]:
# %cd /content/droplet_detection
# !curl -L "[ROBOFLOW-API-KEY]" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip
# clear_output()

Data with No Augmentation (No_Augmentation)

In [6]:
# %cd /content/droplet_detection
# !curl -L "[ROBOFLOW-API-KEY]" > roboflow.zip; unzip roboflow.zip; rm roboflow.zip
# clear_output()

# 2.1 Get information for annotations: Table 2.

For droplet model

In [7]:
# %cd /content/droplet_detection

# # drop_labels() returns a list of the counted labels: [drop_0cell, drop_1cell, drop_2cell, drop_3cell, images]
# totals = []

# totals.append(funcs.drop_labels('train/labels','Train'))

# totals.append(funcs.drop_labels('valid/labels','Validation'))

# totals.append(funcs.drop_labels('test/labels','Test'))

# totals = np.array(totals)

# print("Total Count")
# print("drop_0cell: " + str(np.sum(totals[:,0])))
# print("drop_1cell: " + str(np.sum(totals[:,1])))
# print("drop_2cell: " + str(np.sum(totals[:,2])))
# print("drop_3cell: " + str(np.sum(totals[:,3])))
# print("combined: " + str(np.sum(totals[:,0:4])))
# print("images: " + str(np.sum(totals[:,4])))

For cell model

In [8]:
# %cd /content/droplet_detection

# # cell_labels returns a list of the counted labels: [drop_0cell, drop_1cell, drop_2cell, drop_3cell]
# totals = []

# totals.append(funcs.cell_labels('train/labels','Train'))

# totals.append(funcs.cell_labels('valid/labels','Validation'))

# totals.append(funcs.cell_labels('test/labels','Test'))

# totals = np.array(totals)

# print("Total Count")
# print("combined: " + str(np.sum(totals[:,0])))
# print("images: " + str(np.sum(totals[:,1])))

# 2.2 Save Images for Annotation Examples: Figure ann

For droplet model

In [9]:
# %cd /content/droplet_detection
# funcs.save_labels('train/images', 'droplet', gt_colors=[(0,0,255), (0,255,255), (0,255, 0), (255,0,255)])

# gts = sorted(os.listdir('/label_results/gts'))
# files.download('/label_results/gts/{}'.format(gts[1]))

For cell model

In [10]:
# %cd /content/droplet_detection
# funcs.save_labels('train/images', 'cell', gt_colors=[(0, 255, 0)])

# gts = sorted(os.listdir('/label_results/gts'))
# files.download('/label_results/gts/{}'.format(gts[3]))
# files.download('/label_results/gts/{}'.format(gts[7]))
# files.download('/label_results/gts/{}'.format(gts[9]))

# 3. Save cropped droplets with one or more cells and upload this to roboflow

In [11]:
# # Now convert ground truth labels and boxes
# %cd /content/droplet_detection/yolov3

# # Using the un-augmented dataset save around 2300 images from training, validation, and test droplets with 1 or more cells in them
# # I am sorry this isn't reproducable, I can't remember what I did here. Apparently I only uploaded 2,063 images but when I run this code now it saves 2,269 images.
# # This is what the code was supposed to be like though: 

# tot_saved = funcs.save_cropped(datasets = ["train", "valid", "test"], counter_tot = 0)

# !rm /cropped_drops.zip
# !zip -r /cropped_drops.zip /cropped_drops
# clear_output()

# print("Number of total images saved from train, validation, and test sets: ",tot_saved)
# files.download("/cropped_drops.zip")

# 4.1 Train

<p align=""><a href="https://roboflow.com/?ref=ultralytics"><img width="1000" src="https://uploads-ssl.webflow.com/5f6bc60e665f54545a1e52a5/615338ba77195c71bd2c5ab1_computer-vision-flow.png"/></a></p>
Close the active learning loop by sampling images from your inference conditions with the `roboflow` pip package
<br><br>

Train a YOLOv5s model on the [COCO128](https://www.kaggle.com/ultralytics/coco128) dataset with `--data coco128.yaml`, starting from pretrained `--weights yolov5s.pt`, or from randomly initialized `--weights '' --cfg yolov5s.yaml`.

- **Pretrained [Models](https://github.com/ultralytics/yolov5/tree/master/models)** are downloaded
automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases)
- **[Datasets](https://github.com/ultralytics/yolov5/tree/master/data)** available for autodownload include: [COCO](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml), [COCO128](https://github.com/ultralytics/yolov5/blob/master/data/coco128.yaml), [VOC](https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml), [Argoverse](https://github.com/ultralytics/yolov5/blob/master/data/Argoverse.yaml), [VisDrone](https://github.com/ultralytics/yolov5/blob/master/data/VisDrone.yaml), [GlobalWheat](https://github.com/ultralytics/yolov5/blob/master/data/GlobalWheat2020.yaml), [xView](https://github.com/ultralytics/yolov5/blob/master/data/xView.yaml), [Objects365](https://github.com/ultralytics/yolov5/blob/master/data/Objects365.yaml), [SKU-110K](https://github.com/ultralytics/yolov5/blob/master/data/SKU-110K.yaml).
- **Training Results** are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc.
<br><br>

## Train on Custom Data with Roboflow 🌟 NEW

[Roboflow](https://roboflow.com/?ref=ultralytics) enables you to easily **organize, label, and prepare** a high quality dataset with your own custom data. Roboflow also makes it easy to establish an active learning pipeline, collaborate with your team on dataset improvement, and integrate directly into your model building workflow with the `roboflow` pip package.

- Custom Training Example: [https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/?ref=ultralytics)
- Custom Training Notebook: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/roboflow-ai/yolov5-custom-training-tutorial/blob/main/yolov5-custom-training.ipynb)
<br>

<p align=""><a href="https://roboflow.com/?ref=ultralytics"><img width="480" src="https://uploads-ssl.webflow.com/5f6bc60e665f54545a1e52a5/6152a275ad4b4ac20cd2e21a_roboflow-annotate.gif"/></a></p>Label images lightning fast (including with model-assisted labeling)

In [12]:
# # # Train YOLOv5 on custom dataset. This is currently not working, will have to use ultralytics repository for now: https://github.com/ultralytics/yolov5
# # # I have made an issue on yolov5 repository to hopefully fix the issue
# %cd /content/droplet_detection/yolov5
# !python train.py --img 544 --batch 32 --epochs 1 --data '../yaml_files/droplet_model.yaml' --weights '' --cfg ./models/yolov5m.yaml --cache

In [13]:
# %cp /content/droplet_detection/yolov5/runs/train/exp/weights/best.pt /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_365_best.pt

# 4.2 High quality mAP plots for validation set

In [14]:
# funcs.save_map(results_path = "/content/drive/MyDrive/droplet_classification/data_files/yolov5_cell_train.csv", title="YOLOv5", epoch=38)
# files.download("/mAP_yolov5.png")

# 5.1 mAP calculation for droplet model test set: Table drop

In [15]:
# Run YOLOv5s on COCO test-dev2017 using --task test
%cd /content/droplet_detection/yolov5
!python val.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --data ../yaml_files/droplet_model.yaml --task test

# # Download Figures
# files.download('runs/val/exp/confusion_matrix.tif')
# files.download("runs/val/exp/PR_curve.png")

/content/droplet_detection/yolov5
[34m[1mval: [0mdata=../yaml_files/droplet_model.yaml, weights=['/content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt'], batch_size=32, imgsz=544, conf_thres=0.001, iou_thres=0.6, task=test, device=, workers=8, single_cls=False, augment=False, verbose=False, save_txt=False, save_hybrid=False, save_conf=False, save_json=False, project=runs/val, name=exp, exist_ok=False, half=False, dnn=False
YOLOv5 🚀 2022-8-23 torch 1.12.1+cu113 CUDA:0 (Tesla P100-PCIE-16GB, 16281MiB)

Fusing layers... 
YOLOv5m summary: 290 layers, 20865057 parameters, 0 gradients, 47.9 GFLOPs
Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
100% 755k/755k [00:00<00:00, 47.6MB/s]
[34m[1mtest: [0mScanning '/content/droplet_detection/yolov5/../test/labels' images and labels...128 found, 0 missing, 0 empty, 0 corrupt: 100% 128/128 [00:00<00:00, 1427.96it/s]
[34m[1mtest: [0mNew cache created: /conte

# 5.2 mAP calculation for cell model test set: Table cell

In [16]:
# # Run YOLOv3 on COCO test-dev2017 using --task test
# %cd /content/droplet_detection/yolov5
# !rm -r runs/val/exp
# !python val.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_cell_38_best.pt --img 544 --data ../yaml_files/cell_model.yaml --task test
# # files.download('runs/val/exp/confusion_matrix.tif')
# # files.download("runs/val/exp/PR_curve.png")

# 6.1 Ground truth labels vs predictions for droplet model test set: Figure gvpdrop

In [17]:
# %cd /content/droplet_detection/yolov3
# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source ../test/images --save-txt --save-conf

# funcs.save_labels('../test/images', 'droplet', gt_colors=[(0,0,255), (0,255,255), (0,255, 0), (255,0,255)])

# !rm -r /supplemental
# os.mkdir('/supplemental')
# image_files = sorted(os.listdir("/label_results/gts"))

# random.seed(5)
# # random.seed(5) may have changed over time. This was the output of random.sample(image_files, 6) at the time of writing:

# for i, image_file in enumerate(random.sample(image_files, 6)):
#   shutil.copy('/label_results/inputs/' + image_file, '/supplemental/im_{}_input.png'.format(i))
#   shutil.copy('/label_results/gts/' + image_file, '/supplemental/im_{}_gt.png'.format(i))
#   shutil.copy('/label_results/preds/' + image_file, '/supplemental/im_{}_pred.png'.format(i))

# !rm /supplemental.zip
# !zip -r /supplemental.zip /supplemental
# clear_output()

# files.download('/supplemental.zip')
# files.download("/label_results/gts/" + image_files[109])
# files.download("/label_results/preds/" + image_files[109])

# 6.2 Ground truth labels vs predictions for cell model test set: Figure gvpdrop

In [18]:
# %cd /content/droplet_detection/yolov3
# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_cell_38_best.pt --img 544 --source ../test/images --save-txt --save-conf

# funcs.save_labels('../test/images', 'cell', gt_colors=[(0,255,0)], pred_colors=[(0,0,255)])

# !rm -r /supplemental
# os.mkdir('/supplemental')
# image_files = sorted(os.listdir("/label_results/gt_preds"))

# random.seed(5)
# # random.seed(5) may have changed over time. This was the output of random.sample(image_files, 9) at the time of writing:

# for image_file in random.sample(image_files, 9):
#   shutil.copy('/label_results/gt_preds/' + image_file, '/supplemental')

# !rm /supplemental.zip
# !zip -r /supplemental.zip /supplemental
# clear_output()

# files.download('/supplemental.zip')
# files.download("/label_results/gt_preds/" + image_files[1])
# files.download("/label_results/gt_preds/" + image_files[11])
# files.download("/label_results/gt_preds/" + image_files[68])
# files.download("/label_results/gt_preds/" + image_files[96])

# 7.1 Conduct average run times across test set: Table FPS

In [19]:
# %cd /content/droplet_detection/yolov5
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_cell_38_best.pt --img 544 --conf-thres 0.6 --source=../test/images

# 7.2 Computer Statistics

In [20]:
# !nvidia-smi -L
# !nvidia-smi

In [21]:
# !lscpu |grep 'Model name'

# #no.of sockets i.e available slots for physical processors
# !lscpu | grep 'Socket(s):'

# #no.of cores each processor is having 
# !lscpu | grep 'Core(s) per socket:'

# #no.of threads each core is having
# !lscpu | grep 'Thread(s) per core'

# !lscpu | grep "L3 cache" 

# #if it had turbo boost it would've shown Min and Max MHz also but it is only showing current frequency this means it always operates at shown frequency
# !lscpu | grep "MHz"

# #memory that we can use
# !free -h --si | awk  '/Mem:/{print $2}'

# #hard disk space that we can use
# !df -h / | awk '{print $4}'

# 8.1 Compare Production Set with Hand Counted Percentages: Figure hcomp

Run 500 images from second trial and compare droplet ratios with hand counting (Trial 1)

In [22]:
# %cd /content/droplet_detection/yolov5
# production_images_all = sorted(os.listdir("/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-9-2021_t02_0-6"))

# # random.seed(5)
# # rand_int = random.randint(0,9)
# # # rand_int for seed(5) was 9
# # production_images = production_images_all[1000+50*rand_int : 1000+50*rand_int+50]
# production_images = production_images_all[1000:1500]

# !rm -r data/production_images
# !mkdir data/production_images
# for image in production_images:
#   shutil.copy('/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-9-2021_t02_0-6/' + image, 'data/production_images')

# # !zip -r /hand_count.zip /content/droplet_detection/yolov5/data/production_images
# # files.download("/hand_count.zip")

# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source data/production_images --save-txt
# clear_output()

# # print('random integer: ',rand_int)
# _ = funcs.drop_labels('runs/detect/exp/labels', 'Production')

Run 500 images from third trial and compare droplet ratios with hand counting (Trial 2)

In [23]:
# %cd /content/droplet_detection/yolov5
# production_images_all = sorted(os.listdir("/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_0-3"))

# # random.seed(5)
# # rand_int = random.randint(0,9)
# # # rand_int for seed(5) was 9
# # production_images = production_images_all[1000+50*rand_int : 1000+50*rand_int+50]
# production_images = production_images_all[1000:1500]

# !rm -r data/production_images
# !mkdir data/production_images
# for image in production_images:
#   shutil.copy('/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_0-3/' + image, 'data/production_images')

# # !zip -r /hand_count.zip /content/droplet_detection/yolov5/data/production_images
# # files.download("/hand_count.zip")

# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source data/production_images --save-txt
# clear_output()

# # print('random integer: ',rand_int)
# _ = funcs.drop_labels('runs/detect/exp/labels', 'Production')

Plot ML vs Hand Distribution Curves

In [24]:
# # Grab fractions from .csv file, row is trials and column is droplets... my brain hurts after this one
# totals = np.zeros((20,5))
# with open('/content/drive/MyDrive/droplet_classification/data_files/yolov5_drop_fractions.csv', newline='') as csvfile:
#   spamreader = csv.reader(csvfile, quotechar='|')
#   for i, row in enumerate(spamreader):
#     if i % 6 == 0:
#       continue
#     totals[int((i-1)/6), int((i-1)%6)] = int(row[1])
# ML_fractions = totals[:, 0:-1] / totals[:, -1, None]

# # trial = [1,3]
# trial = [0,2]
# for i in range(2):
#   # Grab fractions from .csv file, row is images and column is droplets
#   totals = np.zeros((500,4))
#   with open(f'/content/drive/MyDrive/droplet_classification/data_files/hand_count_trial{i+1}.csv', newline='') as csvfile:
#     spamreader = csv.reader(csvfile, quotechar='|')
#     next(spamreader)
#     for j, row in enumerate(spamreader):
#       totals[j, :] = [int(row[1]), int(row[2]), int(row[3]), int(row[4])]

#   # rand_int for seed(5) was 9
#   rand_int = 9
#   # BH_fractions = np.sum(totals[50*rand_int : 50*rand_int+50], axis=0) / np.sum(totals[50*rand_int : 50*rand_int+50])
#   BH_fractions = np.sum(totals, axis=0) / np.sum(totals)

#   K = [0,1,2,3]
#   plt.plot(K,[ML_fractions[trial[i],0],ML_fractions[trial[i],1],ML_fractions[trial[i],2],ML_fractions[trial[i],3]], marker='o', color='r', label="YOLOv3 Model")
#   plt.plot(K,[BH_fractions[0],BH_fractions[1],BH_fractions[2],BH_fractions[3]], marker='o', color='g', label='Hand Counted')

#   plt.xticks(K,("0","1","2",r'$\geq3$'))
#   plt.tick_params(labelsize=16)
#   plt.xlabel("k", fontsize=18)
#   plt.ylabel("% of Droplets",fontsize=18)
#   plt.title(f'Trial {i+1}: ' u'\u2248' ' 0-0.5 seconds (50 images)',fontsize=18)
#   # plt.title(f'Trial {i+1}: ' u'\u2248' ' 0-5 seconds (500 images)',fontsize=18)
#   plt.legend(loc='lower right',fontsize=15)

#   plt.savefig(f'/trial_{i+1}.png',dpi=500,bbox_inches='tight')
#   files.download(f'/trial_{i+1}.png')
#   plt.clf()

# 8.2 Compare Production Set with Poisson Distribution: Figure pcomp

For second trial of experimental images between 0-6 minutes (11/9/2021)

In [25]:
# %cd /content/droplet_detection/yolov3
# production_images_all = sorted(os.listdir("/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-9-2021_t02_0-6"))
# production_images = production_images_all[9000:15400]

# !rm -r data/production_images
# !mkdir data/production_images
# for image in production_images:
#   shutil.copy('/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-9-2021_t02_0-6/' + image, 'data/production_images')

# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source data/production_images --save-txt
# clear_output()

# _ = funcs.drop_labels('runs/detect/exp/labels', 'Production')

For second trial of images between 87-89 minutes (11/9/2021)

In [26]:
# %cd /content/droplet_detection/yolov5
# production_images_all = sorted(os.listdir("/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-9-2021_t02_87-89"))
# production_images = production_images_all[0:6400]

# !rm -r data/production_images
# !mkdir data/production_images
# for image in production_images:
#   shutil.copy('/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-9-2021_t02_87-89/' + image, 'data/production_images')

# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source data/production_images --save-txt
# clear_output()

# _ = funcs.drop_labels('runs/detect/exp/labels', 'Production')

For third trial of images between 0-3 minutes (11/15/2021)

In [27]:
# %cd /content/droplet_detection/yolov5
# production_images_all = sorted(os.listdir("/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_0-3"))
# production_images = production_images_all[3000:9400]

# !rm -r data/production_images
# !mkdir data/production_images
# for image in production_images:
#   shutil.copy('/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_0-3/' + image, 'data/production_images')

# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source data/production_images --save-txt
# clear_output()

# _ = funcs.drop_labels('runs/detect/exp/labels', 'Production')

For third trial of images between 59-60 minutes (11/15/2021)

In [28]:
# %cd /content/droplet_detection/yolov5
# production_images_all = sorted(os.listdir("/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_59-60"))
# production_images = production_images_all[0:6400]

# !rm -r data/production_images
# !mkdir data/production_images
# for image in production_images:
#   shutil.copy('/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_59-60/' + image, 'data/production_images')

# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source data/production_images --save-txt
# clear_output()

# _ = funcs.drop_labels('runs/detect/exp/labels', 'Production')

Plot Poisson Distribution Curves

In [29]:
# # Calculate concentration and droplet volume
# c_0 = 7e6
# d = 74e-6
# v_0 = (4/3)*math.pi*(d/2)**3
# v_0 = v_0*10**6

# # Calculate lambda as the expected value or the average number of cells per nanoliter drop
# lam = c_0*v_0


# Pr = []
# K = [0,1,2]
# for k in K:
#   Pr.append((lam**k)*math.exp(-1*lam)/math.factorial(k))
# K.append(3)
# Pr.append(1-sum(Pr))

# # Grab fractions from .csv file, row is trials and column is droplets... my brain hurts after this one
# totals = np.zeros((20,5))
# with open('/content/drive/MyDrive/droplet_classification/data_files/yolov5_drop_fractions.csv', newline='') as csvfile:
#   spamreader = csv.reader(csvfile, quotechar='|')
#   for i, row in enumerate(spamreader):
#     if i % 6 == 0:
#       continue
#     totals[int((i-1)/6), int((i-1)%6)] = float(row[1])
# fractions = totals[:, 0:-1] / totals[:, -1, None]

# # Which row (section) in csv file to grab from
# # t_1 = 4
# # t_2 = 8
# t_1 = 12
# t_2 = 16
# plt.errorbar(K,[fractions[t_1,0],fractions[t_1,1],fractions[t_1,2],fractions[t_1,3]], yerr=np.std(fractions[t_1+1:t_1+4], 0), capsize=3,
#               marker='o', markersize=5, color='r', label="YOLOv3 " u"\u2248" " 0-64 seconds")
# plt.errorbar(K,[fractions[t_2,0],fractions[t_2,1],fractions[t_2,2],fractions[t_2,3]], yerr=np.std(fractions[t_2+1:t_2+4], 0), capsize=3, 
#               marker='o', markersize=5, color='maroon', label="YOLOv3 " u"\u2248" " 5220-5284 seconds")
# plt.plot(K,Pr, marker='o', markersize=5, color='b', label='Poisson Distribution')

# plt.xticks(K,('0','1','2','>3'))
# plt.tick_params(labelsize=16)
# plt.xlabel('k', fontsize=18)
# plt.ylabel('P(x=k)', fontsize=18)
# plt.title("Trial 1: 12800 images", fontsize=18)

# plt.legend(fontsize=12)

# plt.savefig("/trial_1.png",dpi=500,bbox_inches='tight')
# files.download("/trial_1.png")

# 9.1 Conduct inference on production set with both models: Figure ework

In [30]:
# %cd /content/droplet_detection/yolov5
# from utils.general import xywhn2xyxy
# !rm -r data/production
# !rm -r runs/detect/exp

# prod_image = "test_03002_Cam_V710_Cine1.png"
# os.mkdir("data/production")
# shutil.copy('/content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_0-3/'+prod_image,'data/production')
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source data/production --save-txt --save-conf 

# funcs.save_labels('data/production', 'droplet', gt_colors=[(0,0,255), (0,255,255), (0,255, 0), (255,0,255)])
# shutil.copy('/label_results/preds/' + prod_image, '/')

# !rm -r runs/cropped_drops
# os.mkdir('runs/cropped_drops')
# with open('runs/detect/exp/labels/' + prod_image[:-4] + '.txt') as f:
#   boxes = []
#   for i, line in enumerate(f.readlines()):
#     line = line.split()
#     x = float(line[1])
#     y = float(line[2])
#     mean_wh = (float(line[3])+float(line[4]))/2
#     if x + mean_wh/2 > 1:
#       x = 1 - mean_wh/2
#     if y + mean_wh/2 > 1:
#       y = 1 - mean_wh/2
#     if x-mean_wh/2 < 0:
#       x = mean_wh/2
#     if y-mean_wh/2 < 0:
#       y = mean_wh/2
#     boxes.append([x,y,mean_wh,mean_wh])
#   boxes = xywhn2xyxy(np.array(boxes), w=544, h=544)
#   im = cv2.imread('data/production/'+prod_image)
#   for i in range(boxes.shape[0]):
#     # May not be square by one pixel... make square
#     if int(boxes[i,3])-int(boxes[i,1]) < int(boxes[i,2])-int(boxes[i,0]):
#       boxes[i,3] += 1
#     if int(boxes[i,3])-int(boxes[i,1]) > int(boxes[i,2])-int(boxes[i,0]):
#       boxes[i,2] += 1
#     cropped_resized = cv2.resize(im[int(boxes[i,1]):int(boxes[i,3]),int(boxes[i,0]):int(boxes[i,2])],(544, 544))
#     cv2.imwrite("runs/cropped_drops/cropped_drop_"+str(i)+".png",cropped_resized)

# !rm -r runs/detect/exp
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_cell_38_best.pt --img 544 --conf-thres 0.6 --source runs/cropped_drops --save-txt --save-conf
# clear_output()

# funcs.save_labels('runs/cropped_drops', 'cell', pred_labels = 'cell ')
# files.download('/' + prod_image)
# files.download("/label_results/inputs/cropped_drop_1.png")
# files.download("/label_results/preds/cropped_drop_1.png")

# 9.2 Alternative inference on production set with both models

In [31]:
# """
# The paper was already written and model was trained, however this would probably have been the better option
# to train and predict the cropped droplets... shoot maybe next round I will do this but this would have been cool
# to add in the paper!  The idea is to pad the extra height or with with the average pixel value from the image. 
# This way I am not taking some of another droplet with a half droplet that is cropped on the edge of the image.
# Still works for detecting though! 
# """

# %cd /content/droplet_detection/yolov5
# !rm -r runs/cropped_drops
# !rm -r runs/detect/exp
# !rm -r runs/detect/exp2


# prod_image = "test_03002_Cam_V710_Cine1.png"
# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_droplet_374_best.pt --img 544 --conf-thres 0.6 --source /content/drive/MyDrive/droplet_classification/data_files/production_set/data_11-15-2021_t03_0-3/{prod_image} --line-thickness 2 --hide-labels --hide-conf --save-crop
# files.download("runs/detect/exp/" + prod_image)

# count = 0
# !mkdir runs/cropped_drops
# for i in range(4):
#   for im_file in os.listdir('runs/detect/exp/crops/drop_{}cell'.format(i)):
#     image = cv2.imread('runs/detect/exp/crops/drop_{}cell/{}'.format(i,im_file))
#     mean = np.mean(image)
#     h = image.shape[0]
#     w = image.shape[1]
#     if h > w:
#       padded = np.zeros((h, h, 3))
#       l = int((h-w)/2)
#       padded[:,0:l,:] = mean
#       padded[:,l:l+w,:] = image
#       padded[:,l+w:,:] = mean
#     elif w > h:
#       padded = np.zeros((w, w, 3))
#       l = int((w-h)/2)
#       padded[0:l,:,:] = mean
#       padded[l:l+h,:,:] = image
#       padded[l+h:,:,:] = mean
#     else:
#       padded = image.copy()

#     cv2.imwrite('runs/cropped_drops/im_{}.png'.format(count), cv2.resize(padded,(544,544)))
#     count += 1

# !python detect.py --weights /content/drive/MyDrive/droplet_classification/data_files/yolov5_weights_cell_38_best.pt --img 544 --conf-thres 0.6 --source runs/cropped_drops --hide-labels --hide-conf


# 9.3 Construct Poisson Curve for multiple lambdas: Figure ework

In [32]:
# Prs = []
# lambdas = [1,3,6,9]
# colors = ["r","g","b","black",]
# K = list(range(0,21,1))


# for lam,c in zip(lambdas,colors):
#   Pr = []
#   for k in K:
#     Pr.append((lam**k)*math.exp(-1*lam)/math.factorial(k))

#   plt.plot(K,Pr, markersize=3.5,marker='o', color=c, label="\u03BB = " + str(lam))


# plt.xticks([0,5,10,15,20],["0","5","10","15","20"])
# plt.yticks([0.0,0.1,0.2,0.3,0.4],["0.0","0.1","0.2","0.2","0.3","0.4"])
# plt.xlabel("k", fontsize=30, fontname="Arial")
# plt.ylabel("$p_{k}$", fontsize=30, fontname="Arial")
# plt.tick_params(axis='both', which='major', labelsize=25)

# plt.legend(fontsize=20)
# # plt.subplots_adjust(right=0.1)
# plt.savefig("/poisson_distribution.png",dpi=500,bbox_inches='tight')
# files.download("/poisson_distribution.png")