diff --git a/ObjectDetection/ObjectDetection.xcodeproj/project.pbxproj b/ObjectDetection/ObjectDetection.xcodeproj/project.pbxproj index a21134b..ffe52dd 100644 --- a/ObjectDetection/ObjectDetection.xcodeproj/project.pbxproj +++ b/ObjectDetection/ObjectDetection.xcodeproj/project.pbxproj @@ -7,6 +7,10 @@ objects = { /* Begin PBXBuildFile section */ + 2669BE16270FA65200806A63 /* aicook2.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE11270FA61000806A63 /* aicook2.jpg */; }; + 2669BE18270FA65200806A63 /* aicook.txt in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE13270FA62F00806A63 /* aicook.txt */; }; + 2669BE19270FA65200806A63 /* aicook1.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE14270FA63D00806A63 /* aicook1.jpg */; }; + 2669BE1A270FA65200806A63 /* aicook3.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE15270FA65200806A63 /* aicook3.jpg */; }; 266E87232563120D00CF5151 /* classes.txt in Resources */ = {isa = PBXBuildFile; fileRef = 266E87222563120D00CF5151 /* classes.txt */; }; 266E8746256350C000CF5151 /* CameraController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 266E8742256350C000CF5151 /* CameraController.swift */; }; 266E8747256350C000CF5151 /* CVPixelBuffer+Helper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 266E8743256350C000CF5151 /* CVPixelBuffer+Helper.swift */; }; @@ -29,6 +33,10 @@ /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 2669BE11270FA61000806A63 /* aicook2.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = aicook2.jpg; sourceTree = ""; }; + 2669BE13270FA62F00806A63 /* aicook.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = aicook.txt; sourceTree = ""; }; + 2669BE14270FA63D00806A63 /* aicook1.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = aicook1.jpg; sourceTree = ""; }; + 2669BE15270FA65200806A63 /* aicook3.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = aicook3.jpg; sourceTree = ""; }; 266E87222563120D00CF5151 /* classes.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = classes.txt; sourceTree = ""; }; 266E8742256350C000CF5151 /* CameraController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CameraController.swift; sourceTree = ""; }; 266E8743256350C000CF5151 /* CVPixelBuffer+Helper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CVPixelBuffer+Helper.swift"; sourceTree = ""; }; @@ -109,6 +117,10 @@ 269E7487255CC69400B1D6CA /* test1.png */, 269E748B255CC6D100B1D6CA /* test2.jpg */, 269E748A255CC6D100B1D6CA /* test3.png */, + 2669BE13270FA62F00806A63 /* aicook.txt */, + 2669BE14270FA63D00806A63 /* aicook1.jpg */, + 2669BE11270FA61000806A63 /* aicook2.jpg */, + 2669BE15270FA65200806A63 /* aicook3.jpg */, 266E87222563120D00CF5151 /* classes.txt */, 26A8C11326E17F8100F4A58D /* yolov5s.torchscript.ptl */, ); @@ -201,12 +213,16 @@ buildActionMask = 2147483647; files = ( 269E747E255CC56400B1D6CA /* LaunchScreen.storyboard in Resources */, + 2669BE18270FA65200806A63 /* aicook.txt in Resources */, 26A8C11426E17F8100F4A58D /* yolov5s.torchscript.ptl in Resources */, 266E87232563120D00CF5151 /* classes.txt in Resources */, 269E747B255CC56400B1D6CA /* Assets.xcassets in Resources */, + 2669BE19270FA65200806A63 /* aicook1.jpg in Resources */, 269E748D255CC6D100B1D6CA /* test2.jpg in Resources */, + 2669BE16270FA65200806A63 /* aicook2.jpg in Resources */, 269E7488255CC69400B1D6CA /* test1.png in Resources */, 269E7479255CC56200B1D6CA /* Main.storyboard in Resources */, + 2669BE1A270FA65200806A63 /* aicook3.jpg in Resources */, 269E748C255CC6D100B1D6CA /* test3.png in Resources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/ObjectDetection/ObjectDetection/Utils/PrePostProcessor.swift b/ObjectDetection/ObjectDetection/Utils/PrePostProcessor.swift index 881c242..9e2cba5 100644 --- a/ObjectDetection/ObjectDetection/Utils/PrePostProcessor.swift +++ b/ObjectDetection/ObjectDetection/Utils/PrePostProcessor.swift @@ -17,7 +17,7 @@ class PrePostProcessor : NSObject { static let inputWidth = 640 static let inputHeight = 640 - // model output is of size 25200*85 + // model output is of size 25200*(num_of_class+5) static let outputRow = 25200 // as decided by the YOLOv5 model for input image of size 640*640 static let outputColumn = 85 // left, top, right, bottom, score and 80 class probability static let threshold : Float = 0.35 // score above which a detection is generated @@ -111,7 +111,7 @@ class PrePostProcessor : NSObject { let rect = CGRect(x: startX+ivScaleX*left, y: startY+top*ivScaleY, width: ivScaleX*(right-left), height: ivScaleY*(bottom-top)) - let prediction = Prediction(classIndex: cls, score: Float(truncating: outputs[i*85+4]), rect: rect) + let prediction = Prediction(classIndex: cls, score: Float(truncating: outputs[i*outputColumn+4]), rect: rect) predictions.append(prediction) } } diff --git a/ObjectDetection/ObjectDetection/aicook.txt b/ObjectDetection/ObjectDetection/aicook.txt new file mode 100644 index 0000000..2cf5818 --- /dev/null +++ b/ObjectDetection/ObjectDetection/aicook.txt @@ -0,0 +1,30 @@ +apple +banana +beef +blueberries +bread +butter +carrot +cheese +chicken +chicken_breast +chocolate +corn +eggs +flour +goat_cheese +green_beans +ground_beef +ham +heavy_cream +lime +milk +mushrooms +onion +potato +shrimp +spinach +strawberries +sugar +sweet_potato +tomato diff --git a/ObjectDetection/ObjectDetection/aicook1.jpg b/ObjectDetection/ObjectDetection/aicook1.jpg new file mode 100644 index 0000000..3b42a38 Binary files /dev/null and b/ObjectDetection/ObjectDetection/aicook1.jpg differ diff --git a/ObjectDetection/ObjectDetection/aicook2.jpg b/ObjectDetection/ObjectDetection/aicook2.jpg new file mode 100644 index 0000000..c8e42e3 Binary files /dev/null and b/ObjectDetection/ObjectDetection/aicook2.jpg differ diff --git a/ObjectDetection/ObjectDetection/aicook3.jpg b/ObjectDetection/ObjectDetection/aicook3.jpg new file mode 100644 index 0000000..6e8c8b8 Binary files /dev/null and b/ObjectDetection/ObjectDetection/aicook3.jpg differ diff --git a/ObjectDetection/README.md b/ObjectDetection/README.md index 14d32d6..e67fd77 100644 --- a/ObjectDetection/README.md +++ b/ObjectDetection/README.md @@ -4,6 +4,8 @@ [YOLO](https://pjreddie.com/darknet/yolo/) (You Only Look Once) is one of the fastest and most popular object detection models. [YOLOv5](https://github.com/ultralytics/yolov5) is an open-source implementation of the latest version of YOLO (for a quick test of loading YOLOv5 from PyTorch hub for inference, see [here](https://pytorch.org/hub/ultralytics_yolov5/#load-from-pytorch-hub)). This Object Detection with YOLOv5 iOS sample app uses the PyTorch scripted YOLOv5 model to detect objects of the [80 classes](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml) trained with the model. +**Update 10-07-2021**: A new section of using a custom dataset to fine-tune the YOLOv5 model (aka transfer learning) with steps to change the iOS demo app to use the custom model was added. + ## Prerequisites * PyTorch 1.9 and torchvision 0.10 (Optional) @@ -15,24 +17,32 @@ To Test Run the Object Detection iOS App, follow the steps below: -### 1. Prepare the Model +### 1. Prepare the model If you don't have the PyTorch environment set up to run the script, you can download the model file [here](https://pytorch-mobile-demo-apps.s3.us-east-2.amazonaws.com/yolov5s.torchscript.ptl) to the `ios-demo-app/ObjectDetection/ObjectDetection` folder, then skip the rest of this step and go to step 2 directly. The Python script `export.py` in the `models` folder of the [YOLOv5 repo](https://github.com/ultralytics/yolov5) is used to generate a TorchScript-formatted YOLOv5 model named `yolov5s.torchscript.ptl` for mobile apps. -Open a Mac/Linux/Windows Terminal, run the following commands (note that we use the fork of the original YOLOv5 repo to make sure the code changes work, but feel free to use the original repo): +Open a Mac/Linux/Windows Terminal, run the following commands: ``` -git clone https://github.com/jeffxtang/yolov5 +git clone https://github.com/ultralytics/yolov5 cd yolov5 -pip install -r requirements.txt +pip install -r requirements.txt wanb ``` -Finally, run the script below to generate the optimized TorchScript Lite Interpreter model and copy the generated model file `yolov5s.torchscript.ptl` to the `ios-demo-app/ObjectDetection/ObjectDetection` folder: +Note the steps below have been tested with the commit `cd35a009ba964331abccd30f6fa0614224105d39` and if there's any issue with running the script or using the model, try `git reset --hard cd35a009ba964331abccd30f6fa0614224105d39`. + +Edit `export.py` to make the following two changes: + +* After `f = file.with_suffix('.torchscript.pt')`, add a line `fl = file.with_suffix('.torchscript.ptl')` + +* After `(optimize_for_mobile(ts) if optimize else ts).save(f)`, add `(optimize_for_mobile(ts) if optimize else ts)._save_for_lite_interpreter(str(fl))` + +Finally, run the script below to generate the optimized TorchScript Lite Interpreter model and copy the generated model file `yolov5s.torchscript.ptl` to the `ios-demo-app/ObjectDetection/ObjectDetection` folder (the original full JIT model `yolov5s.torchscript.pt` was also generated for comparison): ``` -python models/export.py +python export.py --weights yolov5s.pt --include torchscript ``` Note that small sized version of the YOLOv5 model, which runs faster but with less accuracy, is generated by default when running the `export.py`. You can also change the value of the `weights` parameter in the `export.py` to generate the medium, large, and extra large version of the model. @@ -56,3 +66,66 @@ Some example images and the detection results are as follows: ![](screenshot3.png) ![](screenshot4.png) + +## Transfer Learning + +In this section, you'll see how to use an example dataset called [aicook](https://universe.roboflow.com/karel-cornelis-q2qqg/aicook-lcv4d/4), used to detect ingredients in your fridge, to fine-tune the YOLOv5 model. For more info on the YOLOv5 transfer learning, see [here](https://github.com/ultralytics/yolov5/issues/1314). If you use the default YOLOv5 model to do object detection on what's inside your fridge, you'll likely not get good results. That's why you need to have a custom model trained with a dataset like aicook. + +### 1. Download the custom dataset + +Simply go to [here](https://universe.roboflow.com/karel-cornelis-q2qqg/aicook-lcv4d/4) to download the aicook dataset in a zip file. Unzip the file to your `yolov5` repo directory, then run `cd yolov5; mv train ..; mv valid ..;` as the aicook `data.yaml` specifies the `train` and `val` folders to be up one level. + +### 2. Retrain the YOLOv5 with the custom dataset + +Run the script below to generate a custom model `best.torchscript.pt` located in `runs/train/exp/weights`: + +``` +python train.py --img 640 --batch 16 --epochs 3 --data data.yaml --weights yolov5s.pt +``` + +The precision of the model with the epochs set as 3 is very low - less than 0.01 actually; with a tool such as [Weights and Biases](https://wandb.ai), which can be set up in a few minutes and has been integrated with YOLOv5, you can find that with `--epochs` set as 80, the precision gets to be 0.95. But on a CPU machine, you can quickly train a custom model using the command above, then test it in the iOS demo app. Below is a sample wandb metrics from 3, 30, and 100 epochs of training: + +![](metrics.png) + +### 3. Convert the custom model to lite version + +With the `export.py` modified as in step 1 `Prepare the model` of the section `Quick Start`, you can convert the new custom model to its TorchScript lite version: + +``` +python export.py --weights runs/train/exp/weights/best.pt --include torchscript +``` + +The resulting `best.torchscript.ptl` is located in `runs/train/exp/weights`, which needs to be added to the iOS ObjectDetection demo app project. + +### 4. Update the demo app + +In Xcode, first in `ViewController.swift`, change line `private let testImages = ["test1.png", "test2.jpg", "test3.png"]` to `private let testImages = ["aicook1.jpg", "aicook2.jpg", "aicook3.jpg", "test1.png", "test2.jpg", "test3.png"]` +(The three aicook test images have been added to the repo.) + +Then change lines in `ObjectDetector.swift`: +``` +if let filePath = Bundle.main.path(forResource: "yolov5s.torchscript", ofType: "ptl"), +``` +to: +``` +if let filePath = Bundle.main.path(forResource: "best.torchscript", ofType: "ptl"), +``` +and +``` +if let filePath = Bundle.main.path(forResource: "classes", ofType: "txt"), +``` +to: +``` +if let filePath = Bundle.main.path(forResource: "aicook", ofType: "txt"), +``` +(aicook.txt defines the 30 custom class names, copied from `data.yaml` in the custom dataset downloaded in step 1 of this section.) + +Finally in `PrePostProcessor.swift`, change line `static let outputColumn = 85` to `static let outputColumn = 35`, which is 5 (left, top, right, bottom, score) + 30 (number of custom classes). + +Run the app in Xcode and you should see the custom model working on the first three aicook test images: + +![](aicook1.png) +![](aicook2.png) + +![](aicook3.png) +![](aicook4.png) diff --git a/ObjectDetection/aicook1.png b/ObjectDetection/aicook1.png new file mode 100644 index 0000000..03623ae Binary files /dev/null and b/ObjectDetection/aicook1.png differ diff --git a/ObjectDetection/aicook2.png b/ObjectDetection/aicook2.png new file mode 100644 index 0000000..b579e14 Binary files /dev/null and b/ObjectDetection/aicook2.png differ diff --git a/ObjectDetection/aicook3.png b/ObjectDetection/aicook3.png new file mode 100644 index 0000000..c23a90b Binary files /dev/null and b/ObjectDetection/aicook3.png differ diff --git a/ObjectDetection/aicook4.png b/ObjectDetection/aicook4.png new file mode 100644 index 0000000..8073fa2 Binary files /dev/null and b/ObjectDetection/aicook4.png differ diff --git a/ObjectDetection/metrics.png b/ObjectDetection/metrics.png new file mode 100644 index 0000000..e0f99da Binary files /dev/null and b/ObjectDetection/metrics.png differ