use vilib library to realize image classification,objects detection,g…

…esture detection,pose detection
sunfounder · Oct 20, 2021 · e67d990 · e67d990
1 parent c1061b6
commit e67d990
Show file tree

Hide file tree

Showing 21 changed files with 200 additions and 161 deletions.
diff --git a/examples/gesture_detection.py b/examples/gesture_detection.py
@@ -0,0 +1,9 @@
+from vilib import Vilib
+
+def main():
+    Vilib.camera_start(inverted_flag=True)
+    Vilib.display(imshow=True,web=True)
+    Vilib.gesture_detect_switch(True)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/image_classification.py b/examples/image_classification.py
@@ -1,159 +1,9 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import time
-import numpy as np
-
-import cv2
-
-from PIL import Image
-from tflite_runtime.interpreter import Interpreter
-import threading
-
-CAMERA_WIDTH = 640
-CAMERA_HEIGHT = 480
-
-model_path = './models/mobilenet_v1_0.25_224_quant.tflite'
-labels_path = './models/labels_mobilenet_quant_v1_224.txt'
-
-def load_labels(path):
-  with open(path, 'r') as f:
-    return {i: line.strip() for i, line in enumerate(f.readlines())}
-
-def set_input_tensor(interpreter, image):
-  tensor_index = interpreter.get_input_details()[0]['index']
-  input_tensor = interpreter.tensor(tensor_index)()[0]
-  input_tensor[:, :] = image
-
-
-def classify_image(interpreter, image,labels_map):
-  """Returns a sorted array of classification results."""
-  set_input_tensor(interpreter, image)
-  interpreter.invoke()
-  output_details = interpreter.get_output_details()[0]
-  output = np.squeeze(interpreter.get_tensor(output_details['index']))
-
-  # If the model is quantized (uint8 data), then dequantize the results
-  if output_details['dtype'] == np.uint8:
-    scale, zero_point = output_details['quantization']
-    output = scale * (output - zero_point)
-
-  # for i,out in enumerate(output):
-  #   print(labels_map[i],round(out,3))
-  # print('> ',end=' ')
-
-  # Sort the results
-  ordered = np.argpartition(-output, 1)
-  # Return the person with the highest score
-  return [(i, output[i]) for i in ordered[:1]]
-
-
-results = []
-image = []
-elapsed_ms = 0
-run_flag = False
-
-def imgshow_fuc(input_height, input_width,labels):
-
-  global results
-  global elapsed_ms
-  global image
-  global run_flag
-
-  run_flag = True
-
-  counter, fps = 0, 0
-  start_time = time.time()
-  fps_avg_frame_count = 10
-
-  # open camera
-  cap = cv2.VideoCapture(0)
-  cap.set(3,CAMERA_WIDTH)
-  cap.set(4,CAMERA_HEIGHT)
-  print('start...')
-
-  while cap.isOpened():    
-
-    success,frame = cap.read()
-    if not success:
-      print("Ignoring empty camera frame.")
-      # If loading a video, use 'break' instead of 'continue'.
-      continue
-
-
-    # frame = cv2.flip(frame, -1) # Flip camera vertically
-    image = cv2.resize(frame,(input_width,input_height))
-
-    counter += 1
-    if counter % fps_avg_frame_count == 0:
-        end_time = time.time()
-        fps = fps_avg_frame_count / (end_time - start_time)
-        start_time = time.time()
-
-    if len(results) != 0:
-      label_id, prob = results[0]
-      cv2.putText(frame,labels[label_id] + " " + str(round(prob,3)), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,255), 1, cv2.LINE_AA)
-      cv2.putText(frame, '%.1fms' % (elapsed_ms), (CAMERA_WIDTH-120, 40),cv2.FONT_HERSHEY_PLAIN,1, (255, 255, 225), 1)       
-      cv2.putText(frame, 'fps %s'%round(fps,1), (CAMERA_WIDTH-120, 20),cv2.FONT_HERSHEY_PLAIN,1,(255, 255, 225),1)  
-    cv2.imshow('Detecting...', frame) 
-
-    if cv2.waitKey(1) & 0xFF == ord('q'):
-      break
-    if cv2.waitKey(1) & 0xff == 27: # press 'ESC' to quit
-      break
-    if cv2.getWindowProperty('Detecting...',1) < 0:
-      break
-
-  run_flag = False
-  cap.release()
-  cv2.destroyAllWindows()
-
+from vilib import Vilib
 
 def main():
-  # setting parameters of model and corresponding label
-  parser = argparse.ArgumentParser(
-      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-  parser.add_argument(
-      '--model', 
-      help='File path of .tflite file.',
-      required=False,
-      default=model_path)
-  parser.add_argument(
-      '--labels', 
-      help='File path of labels file.',
-      required=False,
-      default=labels_path)
-  args = parser.parse_args()
-
-  # loading model and corresponding label
-  labels = load_labels(args.labels)
-  interpreter = Interpreter(args.model)
-  interpreter.allocate_tensors()
-  _, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']
-
-  imgshow_t = threading.Thread(target=imgshow_fuc,args=(input_height, input_width,labels))
-  imgshow_t.start()
-
-  global results
-  global elapsed_ms
-  global run_flag
-
-  while True:
-
-    if len(image) != 0:
-      start_time = time.monotonic()
-      results = classify_image(interpreter, image,labels)
-      elapsed_ms = (time.monotonic() - start_time) * 1000
-      label_id, prob = results[0]
-      print(labels[label_id], prob)
-      print(' ')
-
-    if run_flag == False:
-      print('\nend...')
-      break
+    Vilib.camera_start(inverted_flag=True)
+    Vilib.display(imshow=True,web=True)
+    Vilib.image_classify_switch(True)
 
-    time.sleep(0.01)
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()
diff --git a/examples/objects_detection.py b/examples/objects_detection.py
@@ -0,0 +1,9 @@
+from vilib import Vilib
+
+def main():
+    Vilib.camera_start(inverted_flag=True)
+    Vilib.display()
+    Vilib.object_detect_switch(True)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pose_detection.py b/examples/pose_detection.py
@@ -0,0 +1,12 @@
+import os
+from vilib import Vilib
+
+
+def main():
+
+    Vilib.camera_start(inverted_flag=True)
+    Vilib.display(imshow=True,web=True)
+    Vilib.pose_detect_switch(True)
+
+if __name__ == "__main__":
+    main()
diff --git a/install.py b/install.py
@@ -24,9 +24,9 @@ def run_command(cmd=""):
 run_command('sudo python3 setup.py install')
 
 
-# install mideaPipe
-print("install mideaPipe ...")
-run_command('sudo pip3 install mediapipe-rpi3')
+# # install mediapipe-rpi3
+# print("install mediapipe-rpi3 ...")
+# run_command('sudo pip3 install mediapipe-rpi3')
 
 
 #install sunfounder_io

diff --git a/examples/models/coco_labels.txt → models/coco_labels.txt b/examples/models/coco_labels.txt → models/coco_labels.txt
diff --git a/examples/models/detect.tflite → models/detect.tflite b/examples/models/detect.tflite → models/detect.tflite
diff --git a/examples/models/labels_TM2.txt → models/labels_TM2.txt b/examples/models/labels_TM2.txt → models/labels_TM2.txt
diff --git a/.../models/labels_mobilenet_quant_v1_224.txt → models/labels_mobilenet_quant_v1_224.txt b/.../models/labels_mobilenet_quant_v1_224.txt → models/labels_mobilenet_quant_v1_224.txt
diff --git a/...models/mobilenet_v1_0.25_128_quant.tflite → models/mobilenet_v1_0.25_128_quant.tflite b/...models/mobilenet_v1_0.25_128_quant.tflite → models/mobilenet_v1_0.25_128_quant.tflite
diff --git a/...models/mobilenet_v1_0.25_224_quant.tflite → models/mobilenet_v1_0.25_224_quant.tflite b/...models/mobilenet_v1_0.25_224_quant.tflite → models/mobilenet_v1_0.25_224_quant.tflite
diff --git a/.../models/mobilenet_v1_0.5_128_quant.tflite → models/mobilenet_v1_0.5_128_quant.tflite b/.../models/mobilenet_v1_0.5_128_quant.tflite → models/mobilenet_v1_0.5_128_quant.tflite
diff --git a/.../models/mobilenet_v1_0.5_160_quant.tflite → models/mobilenet_v1_0.5_160_quant.tflite b/.../models/mobilenet_v1_0.5_160_quant.tflite → models/mobilenet_v1_0.5_160_quant.tflite
diff --git a/.../models/mobilenet_v1_1.0_224_quant.tflite → models/mobilenet_v1_1.0_224_quant.tflite b/.../models/mobilenet_v1_1.0_224_quant.tflite → models/mobilenet_v1_1.0_224_quant.tflite
diff --git a/examples/models/model_TM2.tflite → models/model_TM2.tflite b/examples/models/model_TM2.tflite → models/model_TM2.tflite
diff --git a/examples/models/movenet_lightning.tflite → models/movenet_lightning.tflite b/examples/models/movenet_lightning.tflite → models/movenet_lightning.tflite
diff --git a/examples/models/movenet_thunder.tflite → models/movenet_thunder.tflite b/examples/models/movenet_thunder.tflite → models/movenet_thunder.tflite
diff --git a/examples/hand_gesture.py → tests/hand_gesture.py b/examples/hand_gesture.py → tests/hand_gesture.py
diff --git a/tests/image_classification.py b/tests/image_classification.py
@@ -0,0 +1,159 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import time
+import numpy as np
+
+import cv2
+
+from PIL import Image
+from tflite_runtime.interpreter import Interpreter
+import threading
+
+CAMERA_WIDTH = 640
+CAMERA_HEIGHT = 480
+
+model_path = '../models/mobilenet_v1_0.25_224_quant.tflite'
+labels_path = '../models/labels_mobilenet_quant_v1_224.txt'
+
+def load_labels(path):
+  with open(path, 'r') as f:
+    return {i: line.strip() for i, line in enumerate(f.readlines())}
+
+def set_input_tensor(interpreter, image):
+  tensor_index = interpreter.get_input_details()[0]['index']
+  input_tensor = interpreter.tensor(tensor_index)()[0]
+  input_tensor[:, :] = image
+
+
+def classify_image(interpreter, image,labels_map):
+  """Returns a sorted array of classification results."""
+  set_input_tensor(interpreter, image)
+  interpreter.invoke()
+  output_details = interpreter.get_output_details()[0]
+  output = np.squeeze(interpreter.get_tensor(output_details['index']))
+
+  # If the model is quantized (uint8 data), then dequantize the results
+  if output_details['dtype'] == np.uint8:
+    scale, zero_point = output_details['quantization']
+    output = scale * (output - zero_point)
+
+  # for i,out in enumerate(output):
+  #   print(labels_map[i],round(out,3))
+  # print('> ',end=' ')
+
+  # Sort the results
+  ordered = np.argpartition(-output, 1)
+  # Return the person with the highest score
+  return [(i, output[i]) for i in ordered[:1]]
+
+
+results = []
+image = []
+elapsed_ms = 0
+run_flag = False
+
+def imgshow_fuc(input_height, input_width,labels):
+
+  global results
+  global elapsed_ms
+  global image
+  global run_flag
+
+  run_flag = True
+
+  counter, fps = 0, 0
+  start_time = time.time()
+  fps_avg_frame_count = 10
+
+  # open camera
+  cap = cv2.VideoCapture(0)
+  cap.set(3,CAMERA_WIDTH)
+  cap.set(4,CAMERA_HEIGHT)
+  print('start...')
+
+  while cap.isOpened():    
+
+    success,frame = cap.read()
+    if not success:
+      print("Ignoring empty camera frame.")
+      # If loading a video, use 'break' instead of 'continue'.
+      continue
+
+
+    # frame = cv2.flip(frame, -1) # Flip camera vertically
+    image = cv2.resize(frame,(input_width,input_height))
+
+    counter += 1
+    if counter % fps_avg_frame_count == 0:
+        end_time = time.time()
+        fps = fps_avg_frame_count / (end_time - start_time)
+        start_time = time.time()
+
+    if len(results) != 0:
+      label_id, prob = results[0]
+      cv2.putText(frame,labels[label_id] + " " + str(round(prob,3)), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,255), 1, cv2.LINE_AA)
+      cv2.putText(frame, '%.1fms' % (elapsed_ms), (CAMERA_WIDTH-120, 40),cv2.FONT_HERSHEY_PLAIN,1, (255, 255, 225), 1)       
+      cv2.putText(frame, 'fps %s'%round(fps,1), (CAMERA_WIDTH-120, 20),cv2.FONT_HERSHEY_PLAIN,1,(255, 255, 225),1)  
+    cv2.imshow('Detecting...', frame) 
+
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+      break
+    if cv2.waitKey(1) & 0xff == 27: # press 'ESC' to quit
+      break
+    if cv2.getWindowProperty('Detecting...',1) < 0:
+      break
+
+  run_flag = False
+  cap.release()
+  cv2.destroyAllWindows()
+
+
+def main():
+  # setting parameters of model and corresponding label
+  parser = argparse.ArgumentParser(
+      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  parser.add_argument(
+      '--model', 
+      help='File path of .tflite file.',
+      required=False,
+      default=model_path)
+  parser.add_argument(
+      '--labels', 
+      help='File path of labels file.',
+      required=False,
+      default=labels_path)
+  args = parser.parse_args()
+
+  # loading model and corresponding label
+  labels = load_labels(args.labels)
+  interpreter = Interpreter(args.model)
+  interpreter.allocate_tensors()
+  _, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']
+
+  imgshow_t = threading.Thread(target=imgshow_fuc,args=(input_height, input_width,labels))
+  imgshow_t.start()
+
+  global results
+  global elapsed_ms
+  global run_flag
+
+  while True:
+
+    if len(image) != 0:
+      start_time = time.monotonic()
+      results = classify_image(interpreter, image,labels)
+      elapsed_ms = (time.monotonic() - start_time) * 1000
+      label_id, prob = results[0]
+      print(labels[label_id], prob)
+      print(' ')
+
+    if run_flag == False:
+      print('\nend...')
+      break
+
+    time.sleep(0.01)
+if __name__ == '__main__':
+  main()
diff --git a/examples/object_detection.py → tests/object_detection.py b/examples/object_detection.py → tests/object_detection.py
@@ -16,8 +16,8 @@
 CAMERA_WIDTH = 640
 CAMERA_HEIGHT = 480
 
-model_path = './models/detect.tflite'
-labels_path = './models/coco_labels.txt'
+model_path = '../models/detect.tflite'
+labels_path = '../models/coco_labels.txt'
 
 def load_labels(path):
   """Loads the labels file. Supports files with or without index numbers."""

diff --git a/examples/pose_detect.py → tests/pose_detect.py b/examples/pose_detect.py → tests/pose_detect.py