Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Hand Detection #2868

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@

// // Example for a Swift Frame Processor plugin automatic registration
VISION_EXPORT_SWIFT_FRAME_PROCESSOR(ExampleSwiftFrameProcessorPlugin, example_kotlin_swift_plugin)
VISION_EXPORT_SWIFT_FRAME_PROCESSOR(ObjectDetectorPlugin, object_detector_plugin)

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,58 @@

#if VISION_CAMERA_ENABLE_FRAME_PROCESSORS
import VisionCamera
import MediaPipeTasksVision

// Example for a Swift Frame Processor plugin
@objc(ExampleSwiftFrameProcessorPlugin)
public class ExampleSwiftFrameProcessorPlugin: FrameProcessorPlugin {
private let handLandmarker: HandLandmarker

public override init(proxy: VisionCameraProxyHolder, options: [AnyHashable: Any]! = [:]) {
super.init(proxy: proxy, options: options)

print("ExampleSwiftFrameProcessorPlugin initialized with options: \(String(describing: options))")
}

public override func callback(_ frame: Frame, withArguments arguments: [AnyHashable: Any]?) -> Any? {
let imageBuffer = CMSampleBufferGetImageBuffer(frame.buffer)
guard let modelPath = Bundle.main.path(forResource: "hand_landmarker",
ofType: "task") else {
fatalError("Model not found!")
}

if let arguments, let imageBuffer {
let width = CVPixelBufferGetWidth(imageBuffer)
let height = CVPixelBufferGetHeight(imageBuffer)
let count = arguments.count
let landmarkerOptions = HandLandmarkerOptions()
landmarkerOptions.baseOptions.modelAssetPath = modelPath
landmarkerOptions.runningMode = .video
landmarkerOptions.minHandDetectionConfidence = 0.6
landmarkerOptions.minHandPresenceConfidence = 0.6
landmarkerOptions.minTrackingConfidence = 0.6
landmarkerOptions.numHands = 2

print(
"ExampleSwiftPlugin: \(width) x \(height) Image. Logging \(count) parameters:"
)
guard let handLandmarker = try? HandLandmarker(options: landmarkerOptions) else {
fatalError("Failed to init Hand Landmarker!")
}
self.handLandmarker = handLandmarker
super.init(proxy: proxy, options: options)
}

for key in arguments.keys {
let value = arguments[key]
let valueString = String(describing: value)
let valueClassString = String(describing: value.self)
print("ExampleSwiftPlugin: -> \(valueString) (\(valueClassString))")
public override func callback(_ frame: Frame, withArguments arguments: [AnyHashable: Any]?) -> Any? {
do {
let image = try MPImage(sampleBuffer: frame.buffer)
let results = try handLandmarker.detect(videoFrame: image, timestampInMilliseconds: Int(frame.timestamp))

var hands: [[String: Any]] = []
for i in 0..<results.handedness.count {
hands.append([
"landmarks": results.landmarks[i].map({ landmark in
return [
"x": NSNumber(value: landmark.x),
"y": NSNumber(value: landmark.y),
"z": NSNumber(value: landmark.z),
"visibility": landmark.visibility
]
}),
])
}
return hands
} catch (let error) {
print("Error: \(error.localizedDescription)")
return []
}

return [
"example_str": "SwiftTest",
"example_bool": false,
"example_double": 6.7,
"example_array": ["Good bye", false, 21.37]
]
}
}
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//
// FaceDetector.swift
// VisionCameraExample
//
// Created by Marc Rousavy on 10.05.24.
//

import Foundation
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//
// ObjectDetector.swift
// VisionCameraExample
//
// Created by Marc Rousavy on 10.05.24.
//

import Foundation
import VisionCamera
import MLKitObjectDetection
import MLKitCommon
import MLKitVision

// Example for a Swift Frame Processor plugin
@objc(ObjectDetectorPlugin)
public class ObjectDetectorPlugin: FrameProcessorPlugin {
private let detector: ObjectDetector

public override init(proxy: VisionCameraProxyHolder, options: [AnyHashable: Any]! = [:]) {
let opt = ObjectDetectorOptions()
if let multi = options["multiOutput"] as? Bool {
opt.shouldEnableMultipleObjects = multi
}
self.detector = ObjectDetector.objectDetector(options: opt)

super.init(proxy: proxy, options: options)
}

public override func callback(_ frame: Frame, withArguments arguments: [AnyHashable: Any]?) -> Any? {
do {
let image = VisionImage(buffer: frame.buffer)
let results = try detector.results(in: image)
return results.map { obj in
return [
"x": obj.frame.origin.x,
"y": obj.frame.origin.y,
"width": obj.frame.size.width,
"height": obj.frame.size.height,
]
}
} catch (let error) {
print("Error: \(error.localizedDescription)")
return []
}
}
}
3 changes: 3 additions & 0 deletions package/example/ios/Podfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ target 'VisionCameraExample' do
:app_path => "#{Pod::Config.instance.installation_root}/.."
)

pod 'MediaPipeTasksVision'
pod 'GoogleMLKit/ObjectDetection'
pod 'GoogleMLKit/FaceDetection'
pod 'VisionCamera', :path => '../..'
require_relative './VisionCameraExampleCocoaPodUtils.rb'

Expand Down
115 changes: 113 additions & 2 deletions package/example/ios/Podfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,85 @@ PODS:
- ReactCommon/turbomodule/core (= 0.72.7)
- fmt (6.2.1)
- glog (0.3.5)
- GoogleDataTransport (9.4.1):
- GoogleUtilities/Environment (~> 7.7)
- nanopb (< 2.30911.0, >= 2.30908.0)
- PromisesObjC (< 3.0, >= 1.2)
- GoogleMLKit/FaceDetection (6.0.0):
- GoogleMLKit/MLKitCore
- MLKitFaceDetection (~> 5.0.0)
- GoogleMLKit/MLKitCore (6.0.0):
- MLKitCommon (~> 11.0.0)
- GoogleMLKit/ObjectDetection (6.0.0):
- GoogleMLKit/MLKitCore
- MLKitObjectDetection (~> 5.0.0)
- GoogleToolboxForMac/Defines (4.2.1)
- GoogleToolboxForMac/Logger (4.2.1):
- GoogleToolboxForMac/Defines (= 4.2.1)
- "GoogleToolboxForMac/NSData+zlib (4.2.1)":
- GoogleToolboxForMac/Defines (= 4.2.1)
- GoogleUtilities/Environment (7.13.2):
- GoogleUtilities/Privacy
- PromisesObjC (< 3.0, >= 1.2)
- GoogleUtilities/Logger (7.13.2):
- GoogleUtilities/Environment
- GoogleUtilities/Privacy
- GoogleUtilities/Privacy (7.13.2)
- GoogleUtilities/UserDefaults (7.13.2):
- GoogleUtilities/Logger
- GoogleUtilities/Privacy
- GoogleUtilitiesComponents (1.1.0):
- GoogleUtilities/Logger
- GTMSessionFetcher/Core (3.4.1)
- hermes-engine (0.72.7):
- hermes-engine/Pre-built (= 0.72.7)
- hermes-engine/Pre-built (0.72.7)
- libevent (2.1.12)
- MediaPipeTasksCommon (0.10.13)
- MediaPipeTasksVision (0.10.13):
- MediaPipeTasksCommon (= 0.10.13)
- MLImage (1.0.0-beta5)
- MLKitCommon (11.0.0):
- GoogleDataTransport (< 10.0, >= 9.4.1)
- GoogleToolboxForMac/Logger (< 5.0, >= 4.2.1)
- "GoogleToolboxForMac/NSData+zlib (< 5.0, >= 4.2.1)"
- GoogleUtilities/UserDefaults (< 8.0, >= 7.13.0)
- GoogleUtilitiesComponents (~> 1.0)
- GTMSessionFetcher/Core (< 4.0, >= 3.3.2)
- MLKitFaceDetection (5.0.0):
- MLKitCommon (~> 11.0)
- MLKitVision (~> 7.0)
- MLKitImageLabelingCommon (7.0.0):
- MLKitCommon (~> 11.0)
- MLKitVision (~> 7.0)
- MLKitObjectDetection (5.0.0):
- MLKitCommon (~> 11.0)
- MLKitObjectDetectionCommon (~> 7.0)
- MLKitVision (~> 7.0)
- MLKitVisionKit (~> 8.0)
- MLKitObjectDetectionCommon (7.0.0):
- MLKitCommon (~> 11.0)
- MLKitVision (~> 7.0)
- MLKitVision (7.0.0):
- GoogleToolboxForMac/Logger (< 5.0, >= 4.2.1)
- "GoogleToolboxForMac/NSData+zlib (< 5.0, >= 4.2.1)"
- GTMSessionFetcher/Core (< 4.0, >= 3.3.2)
- MLImage (= 1.0.0-beta5)
- MLKitCommon (~> 11.0)
- MLKitVisionKit (8.0.0):
- MLKitCommon (~> 11.0)
- MLKitImageLabelingCommon (~> 7.0)
- MLKitObjectDetectionCommon (~> 7.0)
- MLKitVision (~> 7.0)
- MMKV (1.3.4):
- MMKVCore (~> 1.3.4)
- MMKVCore (1.3.4)
- nanopb (2.30910.0):
- nanopb/decode (= 2.30910.0)
- nanopb/encode (= 2.30910.0)
- nanopb/decode (2.30910.0)
- nanopb/encode (2.30910.0)
- PromisesObjC (2.4.0)
- RCT-Folly (2021.07.22.00):
- boost
- DoubleConversion
Expand Down Expand Up @@ -487,8 +559,11 @@ DEPENDENCIES:
- FBLazyVector (from `../node_modules/react-native/Libraries/FBLazyVector`)
- FBReactNativeSpec (from `../node_modules/react-native/React/FBReactNativeSpec`)
- glog (from `../node_modules/react-native/third-party-podspecs/glog.podspec`)
- GoogleMLKit/FaceDetection
- GoogleMLKit/ObjectDetection
- hermes-engine (from `../node_modules/react-native/sdks/hermes-engine/hermes-engine.podspec`)
- libevent (~> 2.1.12)
- MediaPipeTasksVision
- RCT-Folly (from `../node_modules/react-native/third-party-podspecs/RCT-Folly.podspec`)
- RCTRequired (from `../node_modules/react-native/Libraries/RCTRequired`)
- RCTTypeSafety (from `../node_modules/react-native/Libraries/TypeSafety`)
Expand Down Expand Up @@ -540,9 +615,27 @@ DEPENDENCIES:
SPEC REPOS:
trunk:
- fmt
- GoogleDataTransport
- GoogleMLKit
- GoogleToolboxForMac
- GoogleUtilities
- GoogleUtilitiesComponents
- GTMSessionFetcher
- libevent
- MediaPipeTasksCommon
- MediaPipeTasksVision
- MLImage
- MLKitCommon
- MLKitFaceDetection
- MLKitImageLabelingCommon
- MLKitObjectDetection
- MLKitObjectDetectionCommon
- MLKitVision
- MLKitVisionKit
- MMKV
- MMKVCore
- nanopb
- PromisesObjC
- SocketRocket

EXTERNAL SOURCES:
Expand Down Expand Up @@ -659,10 +752,28 @@ SPEC CHECKSUMS:
FBReactNativeSpec: 638095fe8a01506634d77b260ef8a322019ac671
fmt: ff9d55029c625d3757ed641535fd4a75fedc7ce9
glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
GoogleDataTransport: 6c09b596d841063d76d4288cc2d2f42cc36e1e2a
GoogleMLKit: 97ac7af399057e99182ee8edfa8249e3226a4065
GoogleToolboxForMac: d1a2cbf009c453f4d6ded37c105e2f67a32206d8
GoogleUtilities: c56430aef51a1aa57b25da78c3f8397e522c67b7
GoogleUtilitiesComponents: 679b2c881db3b615a2777504623df6122dd20afe
GTMSessionFetcher: 8000756fc1c19d2e5697b90311f7832d2e33f6cd
hermes-engine: 9180d43df05c1ed658a87cc733dc3044cf90c00a
libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
MediaPipeTasksCommon: 5c86b477b18fa034db290aead83d1009be3dbcff
MediaPipeTasksVision: 4782191be198e124756e76e66bb3a8917850b1cb
MLImage: 1824212150da33ef225fbd3dc49f184cf611046c
MLKitCommon: afec63980417d29ffbb4790529a1b0a2291699e1
MLKitFaceDetection: 7c0e8bf09ddd27105da32d088fca978a99fc30cc
MLKitImageLabelingCommon: fa802f14c3a321121409f6cbe1df89ba9cf1a00c
MLKitObjectDetection: 9b40359730baceef23322c6d41070da7639ff100
MLKitObjectDetectionCommon: f3ca8581a0741be6ce609920367316571a280efa
MLKitVision: e858c5f125ecc288e4a31127928301eaba9ae0c1
MLKitVisionKit: 6d2ea9741b262ec0fed2a5a3de521cfa671c6e83
MMKV: ed58ad794b3f88c24d604a5b74f3fba17fcbaf74
MMKVCore: a67a1cede26175c413176f404a7cedec43f96a0b
nanopb: 438bc412db1928dac798aa6fd75726007be04262
PromisesObjC: f5707f49cb48b9636751c5b2e7d227e43fba9f47
RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
RCTRequired: 83bca1c184feb4d2e51c72c8369b83d641443f95
RCTTypeSafety: 13c4a87a16d7db6cd66006ce9759f073402ef85b
Expand Down Expand Up @@ -711,6 +822,6 @@ SPEC CHECKSUMS:
VisionCamera: b633f90960feab2669b7a1c51f8a201dd0a5bfc3
Yoga: 4c3aa327e4a6a23eeacd71f61c81df1bcdf677d5

PODFILE CHECKSUM: 66976ac26c778d788a06e6c1bab624e6a1233cdd
PODFILE CHECKSUM: ed5841958979d8eedbe1edba1d1ed35b89429590

COCOAPODS: 1.11.3
COCOAPODS: 1.14.3