Skip to content

Commit

Permalink
use vilib library to realize image classification,objects detection,g…
Browse files Browse the repository at this point in the history
…esture detection,pose detection
  • Loading branch information
dino committed Oct 20, 2021
1 parent c1061b6 commit e67d990
Show file tree
Hide file tree
Showing 21 changed files with 200 additions and 161 deletions.
9 changes: 9 additions & 0 deletions examples/gesture_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from vilib import Vilib

def main():
Vilib.camera_start(inverted_flag=True)
Vilib.display(imshow=True,web=True)
Vilib.gesture_detect_switch(True)

if __name__ == "__main__":
main()
162 changes: 6 additions & 156 deletions examples/image_classification.py
Original file line number Diff line number Diff line change
@@ -1,159 +1,9 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import time
import numpy as np

import cv2

from PIL import Image
from tflite_runtime.interpreter import Interpreter
import threading

CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480

model_path = './models/mobilenet_v1_0.25_224_quant.tflite'
labels_path = './models/labels_mobilenet_quant_v1_224.txt'

def load_labels(path):
with open(path, 'r') as f:
return {i: line.strip() for i, line in enumerate(f.readlines())}

def set_input_tensor(interpreter, image):
tensor_index = interpreter.get_input_details()[0]['index']
input_tensor = interpreter.tensor(tensor_index)()[0]
input_tensor[:, :] = image


def classify_image(interpreter, image,labels_map):
"""Returns a sorted array of classification results."""
set_input_tensor(interpreter, image)
interpreter.invoke()
output_details = interpreter.get_output_details()[0]
output = np.squeeze(interpreter.get_tensor(output_details['index']))

# If the model is quantized (uint8 data), then dequantize the results
if output_details['dtype'] == np.uint8:
scale, zero_point = output_details['quantization']
output = scale * (output - zero_point)

# for i,out in enumerate(output):
# print(labels_map[i],round(out,3))
# print('> ',end=' ')

# Sort the results
ordered = np.argpartition(-output, 1)
# Return the person with the highest score
return [(i, output[i]) for i in ordered[:1]]


results = []
image = []
elapsed_ms = 0
run_flag = False

def imgshow_fuc(input_height, input_width,labels):

global results
global elapsed_ms
global image
global run_flag

run_flag = True

counter, fps = 0, 0
start_time = time.time()
fps_avg_frame_count = 10

# open camera
cap = cv2.VideoCapture(0)
cap.set(3,CAMERA_WIDTH)
cap.set(4,CAMERA_HEIGHT)
print('start...')

while cap.isOpened():

success,frame = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue


# frame = cv2.flip(frame, -1) # Flip camera vertically
image = cv2.resize(frame,(input_width,input_height))

counter += 1
if counter % fps_avg_frame_count == 0:
end_time = time.time()
fps = fps_avg_frame_count / (end_time - start_time)
start_time = time.time()

if len(results) != 0:
label_id, prob = results[0]
cv2.putText(frame,labels[label_id] + " " + str(round(prob,3)), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,255), 1, cv2.LINE_AA)
cv2.putText(frame, '%.1fms' % (elapsed_ms), (CAMERA_WIDTH-120, 40),cv2.FONT_HERSHEY_PLAIN,1, (255, 255, 225), 1)
cv2.putText(frame, 'fps %s'%round(fps,1), (CAMERA_WIDTH-120, 20),cv2.FONT_HERSHEY_PLAIN,1,(255, 255, 225),1)
cv2.imshow('Detecting...', frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
if cv2.waitKey(1) & 0xff == 27: # press 'ESC' to quit
break
if cv2.getWindowProperty('Detecting...',1) < 0:
break

run_flag = False
cap.release()
cv2.destroyAllWindows()

from vilib import Vilib

def main():
# setting parameters of model and corresponding label
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--model',
help='File path of .tflite file.',
required=False,
default=model_path)
parser.add_argument(
'--labels',
help='File path of labels file.',
required=False,
default=labels_path)
args = parser.parse_args()

# loading model and corresponding label
labels = load_labels(args.labels)
interpreter = Interpreter(args.model)
interpreter.allocate_tensors()
_, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']

imgshow_t = threading.Thread(target=imgshow_fuc,args=(input_height, input_width,labels))
imgshow_t.start()

global results
global elapsed_ms
global run_flag

while True:

if len(image) != 0:
start_time = time.monotonic()
results = classify_image(interpreter, image,labels)
elapsed_ms = (time.monotonic() - start_time) * 1000
label_id, prob = results[0]
print(labels[label_id], prob)
print(' ')

if run_flag == False:
print('\nend...')
break
Vilib.camera_start(inverted_flag=True)
Vilib.display(imshow=True,web=True)
Vilib.image_classify_switch(True)

time.sleep(0.01)
if __name__ == '__main__':
main()
if __name__ == "__main__":
main()
9 changes: 9 additions & 0 deletions examples/objects_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from vilib import Vilib

def main():
Vilib.camera_start(inverted_flag=True)
Vilib.display()
Vilib.object_detect_switch(True)

if __name__ == "__main__":
main()
12 changes: 12 additions & 0 deletions examples/pose_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os
from vilib import Vilib


def main():

Vilib.camera_start(inverted_flag=True)
Vilib.display(imshow=True,web=True)
Vilib.pose_detect_switch(True)

if __name__ == "__main__":
main()
6 changes: 3 additions & 3 deletions install.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def run_command(cmd=""):
run_command('sudo python3 setup.py install')


# install mideaPipe
print("install mideaPipe ...")
run_command('sudo pip3 install mediapipe-rpi3')
# # install mediapipe-rpi3
# print("install mediapipe-rpi3 ...")
# run_command('sudo pip3 install mediapipe-rpi3')


#install sunfounder_io
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
159 changes: 159 additions & 0 deletions tests/image_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import time
import numpy as np

import cv2

from PIL import Image
from tflite_runtime.interpreter import Interpreter
import threading

CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480

model_path = '../models/mobilenet_v1_0.25_224_quant.tflite'
labels_path = '../models/labels_mobilenet_quant_v1_224.txt'

def load_labels(path):
with open(path, 'r') as f:
return {i: line.strip() for i, line in enumerate(f.readlines())}

def set_input_tensor(interpreter, image):
tensor_index = interpreter.get_input_details()[0]['index']
input_tensor = interpreter.tensor(tensor_index)()[0]
input_tensor[:, :] = image


def classify_image(interpreter, image,labels_map):
"""Returns a sorted array of classification results."""
set_input_tensor(interpreter, image)
interpreter.invoke()
output_details = interpreter.get_output_details()[0]
output = np.squeeze(interpreter.get_tensor(output_details['index']))

# If the model is quantized (uint8 data), then dequantize the results
if output_details['dtype'] == np.uint8:
scale, zero_point = output_details['quantization']
output = scale * (output - zero_point)

# for i,out in enumerate(output):
# print(labels_map[i],round(out,3))
# print('> ',end=' ')

# Sort the results
ordered = np.argpartition(-output, 1)
# Return the person with the highest score
return [(i, output[i]) for i in ordered[:1]]


results = []
image = []
elapsed_ms = 0
run_flag = False

def imgshow_fuc(input_height, input_width,labels):

global results
global elapsed_ms
global image
global run_flag

run_flag = True

counter, fps = 0, 0
start_time = time.time()
fps_avg_frame_count = 10

# open camera
cap = cv2.VideoCapture(0)
cap.set(3,CAMERA_WIDTH)
cap.set(4,CAMERA_HEIGHT)
print('start...')

while cap.isOpened():

success,frame = cap.read()
if not success:
print("Ignoring empty camera frame.")
# If loading a video, use 'break' instead of 'continue'.
continue


# frame = cv2.flip(frame, -1) # Flip camera vertically
image = cv2.resize(frame,(input_width,input_height))

counter += 1
if counter % fps_avg_frame_count == 0:
end_time = time.time()
fps = fps_avg_frame_count / (end_time - start_time)
start_time = time.time()

if len(results) != 0:
label_id, prob = results[0]
cv2.putText(frame,labels[label_id] + " " + str(round(prob,3)), (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,255), 1, cv2.LINE_AA)
cv2.putText(frame, '%.1fms' % (elapsed_ms), (CAMERA_WIDTH-120, 40),cv2.FONT_HERSHEY_PLAIN,1, (255, 255, 225), 1)
cv2.putText(frame, 'fps %s'%round(fps,1), (CAMERA_WIDTH-120, 20),cv2.FONT_HERSHEY_PLAIN,1,(255, 255, 225),1)
cv2.imshow('Detecting...', frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
if cv2.waitKey(1) & 0xff == 27: # press 'ESC' to quit
break
if cv2.getWindowProperty('Detecting...',1) < 0:
break

run_flag = False
cap.release()
cv2.destroyAllWindows()


def main():
# setting parameters of model and corresponding label
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--model',
help='File path of .tflite file.',
required=False,
default=model_path)
parser.add_argument(
'--labels',
help='File path of labels file.',
required=False,
default=labels_path)
args = parser.parse_args()

# loading model and corresponding label
labels = load_labels(args.labels)
interpreter = Interpreter(args.model)
interpreter.allocate_tensors()
_, input_height, input_width, _ = interpreter.get_input_details()[0]['shape']

imgshow_t = threading.Thread(target=imgshow_fuc,args=(input_height, input_width,labels))
imgshow_t.start()

global results
global elapsed_ms
global run_flag

while True:

if len(image) != 0:
start_time = time.monotonic()
results = classify_image(interpreter, image,labels)
elapsed_ms = (time.monotonic() - start_time) * 1000
label_id, prob = results[0]
print(labels[label_id], prob)
print(' ')

if run_flag == False:
print('\nend...')
break

time.sleep(0.01)
if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions examples/object_detection.py → tests/object_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
CAMERA_WIDTH = 640
CAMERA_HEIGHT = 480

model_path = './models/detect.tflite'
labels_path = './models/coco_labels.txt'
model_path = '../models/detect.tflite'
labels_path = '../models/coco_labels.txt'

def load_labels(path):
"""Loads the labels file. Supports files with or without index numbers."""
Expand Down
File renamed without changes.

0 comments on commit e67d990

Please sign in to comment.