In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import cv2
import os

# Import matplotlib libraries
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.patches as patches

# Some modules to display an animation using imageio.
import imageio
from IPython.display import HTML, display




In [3]:
# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Maps bones to a matplotlib color name.
KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def _keypoints_and_edges_for_display(keypoints_with_scores,
                                     height,
                                     width,
                                     keypoint_threshold=0.11):
  """Returns high confidence keypoints and edges for visualization.

  Args:
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    height: height of the image in pixels.
    width: width of the image in pixels.
    keypoint_threshold: minimum confidence score for a keypoint to be
      visualized.

  Returns:
    A (keypoints_xy, edges_xy, edge_colors) containing:
      * the coordinates of all keypoints of all detected entities;
      * the coordinates of all skeleton edges of all detected entities;
      * the colors in which the edges should be plotted.
  """
  keypoints_all = []
  keypoint_edges_all = []
  edge_colors = []
  num_instances, _, _, _ = keypoints_with_scores.shape
  for idx in range(num_instances):
    kpts_x = keypoints_with_scores[0, idx, :, 1]
    kpts_y = keypoints_with_scores[0, idx, :, 0]
    kpts_scores = keypoints_with_scores[0, idx, :, 2]
    kpts_absolute_xy = np.stack(
        [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
    kpts_above_thresh_absolute = kpts_absolute_xy[
        kpts_scores > keypoint_threshold, :]
    keypoints_all.append(kpts_above_thresh_absolute)

    for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
      if (kpts_scores[edge_pair[0]] > keypoint_threshold and
          kpts_scores[edge_pair[1]] > keypoint_threshold):
        x_start = kpts_absolute_xy[edge_pair[0], 0]
        y_start = kpts_absolute_xy[edge_pair[0], 1]
        x_end = kpts_absolute_xy[edge_pair[1], 0]
        y_end = kpts_absolute_xy[edge_pair[1], 1]
        line_seg = np.array([[x_start, y_start], [x_end, y_end]])
        keypoint_edges_all.append(line_seg)
        edge_colors.append(color)
  if keypoints_all:
    keypoints_xy = np.concatenate(keypoints_all, axis=0)
  else:
    keypoints_xy = np.zeros((0, 17, 2))

  if keypoint_edges_all:
    edges_xy = np.stack(keypoint_edges_all, axis=0)
  else:
    edges_xy = np.zeros((0, 2, 2))
  return keypoints_xy, edges_xy, edge_colors


def draw_prediction_on_image(
    image, keypoints_with_scores, crop_region=None, close_figure=False,
    output_image_height=None):
  """Draws the keypoint predictions on image.

  Args:
    image: A numpy array with shape [height, width, channel] representing the
      pixel values of the input image.
    keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
      the keypoint coordinates and scores returned from the MoveNet model.
    crop_region: A dictionary that defines the coordinates of the bounding box
      of the crop region in normalized coordinates (see the init_crop_region
      function below for more detail). If provided, this function will also
      draw the bounding box on the image.
    output_image_height: An integer indicating the height of the output image.
      Note that the image aspect ratio will be the same as the input image.

  Returns:
    A numpy array with shape [out_height, out_width, channel] representing the
    image overlaid with keypoint predictions.
  """
  height, width, channel = image.shape
  aspect_ratio = float(width) / height
  fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))
  # To remove the huge white borders
  fig.tight_layout(pad=0)
  ax.margins(0)
  ax.set_yticklabels([])
  ax.set_xticklabels([])
  plt.axis('off')

  im = ax.imshow(image)
  line_segments = LineCollection([], linewidths=(4), linestyle='solid')
  ax.add_collection(line_segments)
  # Turn off tick labels
  scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)

  (keypoint_locs, keypoint_edges,
   edge_colors) = _keypoints_and_edges_for_display(
       keypoints_with_scores, height, width)

  line_segments.set_segments(keypoint_edges)
  line_segments.set_color(edge_colors)
  if keypoint_edges.shape[0]:
    line_segments.set_segments(keypoint_edges)
    line_segments.set_color(edge_colors)
  if keypoint_locs.shape[0]:
    scat.set_offsets(keypoint_locs)

  if crop_region is not None:
    xmin = max(crop_region['x_min'] * width, 0.0)
    ymin = max(crop_region['y_min'] * height, 0.0)
    rec_width = min(crop_region['x_max'], 0.99) * width - xmin
    rec_height = min(crop_region['y_max'], 0.99) * height - ymin
    rect = patches.Rectangle(
        (xmin,ymin),rec_width,rec_height,
        linewidth=1,edgecolor='b',facecolor='none')
    ax.add_patch(rect)

  fig.canvas.draw()
  image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
  image_from_plot = image_from_plot.reshape(
      fig.canvas.get_width_height()[::-1] + (3,))
  plt.close(fig)
  if output_image_height is not None:
    output_image_width = int(output_image_height / height * width)
    image_from_plot = cv2.resize(
        image_from_plot, dsize=(output_image_width, output_image_height),
         interpolation=cv2.INTER_CUBIC)
  return image_from_plot

def progress(value, max=100):
  return HTML("""
      <progress
          value='{value}'
          max='{max}',
          style='width: 100%'
      >
          {value}
      </progress>
  """.format(value=value, max=max))

In [4]:
# Konversi warna dari matplotlib-style ke BGR-nya OpenCV
COLOR_MAP = {
    'm': (255, 0, 255),    # magenta
    'c': (255, 255, 0),    # cyan
    'y': (0, 255, 255)     # yellow
}

def draw_keypoints_and_skeleton(frame, keypoints, threshold=0.2):
    h, w, _ = frame.shape
    points = []

    # Gambar keypoints
    for i, kp in enumerate(keypoints):
        y, x, confidence = kp
        if confidence > threshold:
            cx, cy = int(x * w), int(y * h)
            points.append((i, (cx, cy)))
            cv2.circle(frame, (cx, cy), 4, (0, 255, 0), -1)  # Titik warna hijau
            # Optional: label angka titik
            cv2.putText(frame, str(i), (cx + 5, cy - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        else:
            points.append((i, None))  # Keypoint tidak valid

    # Gambar tulang (skeleton)
    for edge, color_code in KEYPOINT_EDGE_INDS_TO_COLOR.items():
        p1_idx, p2_idx = edge
        pt1 = points[p1_idx][1]
        pt2 = points[p2_idx][1]

        if pt1 is not None and pt2 is not None:
            cv2.line(frame, pt1, pt2, COLOR_MAP[color_code], 2)

    return frame

In [5]:
model_name = "movenet_thunder"

if "tflite" in model_name:
  if "movenet_lightning_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_f16" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite
    input_size = 256
  elif "movenet_lightning_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite
    input_size = 192
  elif "movenet_thunder_int8" in model_name:
    !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  # Initialize the TFLite interpreter
  interpreter = tf.lite.Interpreter(model_path="model.tflite")
  interpreter.allocate_tensors()

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    # TF Lite format expects tensor type of uint8.
    input_image = tf.cast(input_image, dtype=tf.uint8)
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
    # Invoke inference.
    interpreter.invoke()
    # Get the model prediction.
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    return keypoints_with_scores

else:
  if "movenet_lightning" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
    input_size = 192
  elif "movenet_thunder" in model_name:
    module = hub.load("https://tfhub.dev/google/movenet/singlepose/thunder/4")
    input_size = 256
  else:
    raise ValueError("Unsupported model name: %s" % model_name)

  def movenet(input_image):
    """Runs detection on an input image.

    Args:
      input_image: A [1, height, width, 3] tensor represents the input image
        pixels. Note that the height/width should already be resized and match the
        expected input resolution of the model before passing into this function.

    Returns:
      A [1, 1, 17, 3] float numpy array representing the predicted keypoint
      coordinates and scores.
    """
    model = module.signatures['serving_default']

    # SavedModel format expects tensor type of int32.
    input_image = tf.cast(input_image, dtype=tf.int32)
    # Run model inference.
    outputs = model(input_image)
    # Output is a [1, 1, 17, 3] tensor.
    keypoints_with_scores = outputs['output_0'].numpy()
    return keypoints_with_scores













In [6]:
def load_movenet(image_path):
    # Load the input image.
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image)

    # Resize and pad the image to keep the aspect ratio and fit the expected size.
    input_image = tf.expand_dims(image, axis=0)
    input_image = tf.image.resize_with_pad(input_image, input_size, input_size)

    # Run model inference.
    keypoints_with_scores = movenet(input_image)
    
    return keypoints_with_scores

def visualize_movenet(keypoints_with_scores, image_path,  output_path):
    
    # Load the input image.
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image)
    
    # Visualize the predictions with image.
    display_image = tf.expand_dims(image, axis=0)
    display_image = tf.cast(tf.image.resize_with_pad(
        display_image, 1280, 1280), dtype=tf.int32)
    output_overlay = draw_prediction_on_image(
        np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)

    # plt.figure(figsize=(5, 5))
    # plt.imshow(output_overlay)
    # _ = plt.axis('off')

    # Save the overlay
    cv2.imwrite(output_path, cv2.cvtColor(output_overlay, cv2.COLOR_RGB2BGR))

In [7]:
def normalize_hip_center(keypoints_with_scores):
    
    # Get all keypoints
    keypoints = keypoints_with_scores[ :, :2]  # shape: (17, 2)
    scores = keypoints_with_scores[ :, 2]      # Confidence scores
    
    confidence_threshold = 0.3
    
    # Get center of body (hip)
    left_hip = keypoints[11]
    right_hip = keypoints[12]
    
    # Get the hip center (if confidence > threshold)
    if scores[11] > confidence_threshold and scores[12] > confidence_threshold :
        hip_center = (left_hip + right_hip) / 2
    else:
        # Fallback: use the mean of all the keypoints
        valid_indices = np.where(scores > confidence_threshold)[0]
        hip_center = np.mean(keypoints[valid_indices], axis=0) if len(valid_indices) > 0 else np.zeros(2)
    
    # Change the keypoints to hip oriented
    keypoints_centered = keypoints - hip_center
    
    # Height estimation from shoulder to hip
    left_shoulder = keypoints[5]
    right_shoulder = keypoints[6]
    
    # Normalization with height estimation (if confidence > threshold)
    if scores[5] > confidence_threshold and scores[6] > confidence_threshold and np.linalg.norm(left_shoulder - right_shoulder) > 0:
        shoulder_center = (left_shoulder + right_shoulder) / 2
        body_height = np.linalg.norm(shoulder_center - hip_center)
        keypoints_normalized = keypoints_centered / body_height
    else:
        # Fallback: use a normal case body_height shoulder-to-hip 0.3 - 0.5
        keypoints_normalized = keypoints_centered / 0.4

    
    return keypoints_normalized

In [None]:
from time import localtime, strftime
from datetime import datetime


format_time = "%H:%M"
time_now = strftime("%H:%M", localtime())
# time_now = "13:20"

salat_wajib = ["shubuh", "dzuhur", "ashar", "maghrib", "isya"]
waktu_salat = ["04:52", "06:15", "12:27", "15:51", "15:51", "18:36", "18:36", "19:50", "19:50", "04:52"]

rakaat_dict = {
    'shubuh' : 2,
    'dzuhur' : 4,
    'ashar' : 4,
    'maghrib' : 3,
    'isya' : 4,
    'Tidak Waktunya' : 0
}

rakaat_ganjil = ["berdiri", "sujud", "duduk", "sujud", "berdiri"]
rakaat_genap = ["berdiri", "sujud", "duduk", "sujud", "duduk", "berdiri"]
rakaat_akhir = ["berdiri", "sujud", "duduk", "sujud", "duduk"]

rukun_salat = {
    0 : [],
    2 : [rakaat_ganjil, rakaat_akhir],
    3 : [rakaat_ganjil, rakaat_genap, rakaat_akhir],
    4 : [rakaat_ganjil, rakaat_genap, rakaat_ganjil, rakaat_akhir]
}

if datetime.strptime(time_now, format_time) >= datetime.strptime(waktu_salat[0], format_time) and datetime.strptime(time_now, format_time) <= datetime.strptime(waktu_salat[1], format_time):
    salat = salat_wajib[0]
elif datetime.strptime(time_now, format_time) >= datetime.strptime(waktu_salat[2], format_time) and datetime.strptime(time_now, format_time) <= datetime.strptime(waktu_salat[3], format_time):
    salat = salat_wajib[1]
elif datetime.strptime(time_now, format_time) >= datetime.strptime(waktu_salat[4], format_time) and datetime.strptime(time_now, format_time) <= datetime.strptime(waktu_salat[5], format_time):
    salat = salat_wajib[2]
elif datetime.strptime(time_now, format_time) >= datetime.strptime(waktu_salat[6], format_time) and datetime.strptime(time_now, format_time) <= datetime.strptime(waktu_salat[7], format_time):
    salat = salat_wajib[3]
elif datetime.strptime(time_now, format_time) >= datetime.strptime(waktu_salat[8], format_time) and datetime.strptime(time_now, format_time) <= datetime.strptime(waktu_salat[9], format_time):
    salat = salat_wajib[4]
else:
    salat = "Tidak Waktunya"

In [41]:
def cek_benar(gerak_now, rakaat_now, runtutan, gerak_curr):
    kebenaran = True
    if gerak_now != gerak_curr:
        rukun_now = rukun_salat.get(rakaat_dict.get(salat))
        gerak_next = rukun_now[rakaat_now][len(runtutan)]

        if len(runtutan) < 0:
            gerak_curr = runtutan[len(runtutan)-1]

        if gerak_now == gerak_next:
            runtutan.append(gerak_now)

            if len(runtutan) >= len(rukun_now[rakaat_now]):
                rakaat_now += 1
                runtutan = []

            gerak_next = rukun_now[rakaat_now][len(runtutan)]
        else:
            kebenaran = False

    return rakaat_now, runtutan, kebenaran, gerak_curr


In [45]:
gerakk = "sujud"

rakaat_now, runtutan, kebenaran, gerak_curr = cek_benar(gerakk, rakaat_now, runtutan, gerak_curr)
print(rakaat_now)
print(runtutan)

0
['berdiri', 'sujud']


In [9]:
# Memuat kembali
from tensorflow.keras.models import load_model
deteksi_gerakan = load_model('Dataset/deteksi_gerakan.h5')



In [69]:
from sklearn.preprocessing import LabelEncoder


runtutan = []

gerak_curr = ""
rakaat_now = 0

# Fungsi untuk preproses input
def process_input(image):
    img = tf.image.resize_with_pad(tf.expand_dims(image, axis=0), 256, 256)
    return tf.cast(img, dtype=tf.int32)

# Buka kamera dari HP via IP Webcam
url = 'http://192.168.137.46:8080/video'
cap = cv2.VideoCapture(2)

encoder = LabelEncoder()
label = ['berdiri', 'duduk', 'sujud']

threshold = 0.7

encoder.fit_transform(label)

cv2.namedWindow('MoveNet Real-Time', cv2.WINDOW_NORMAL)  # Biar bisa resize
cv2.resizeWindow('MoveNet Real-Time', 720, 1280)         # Bisa ganti sesuai ukuran layar


while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)

    # Ubah BGR (OpenCV) ke RGB (TensorFlow)
    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    input_image = process_input(img_rgb)

    # Deteksi pose
    outputs = movenet(input_image)

    keypoints = outputs[0, 0, :, :]

    if np.mean(keypoints[:, 2]) > 0.4:

        pred = deteksi_gerakan.predict(np.array([normalize_hip_center(keypoints).flatten()]))

        confidence = np.max(pred)

        label_index = np.argmax(pred)
        predicted_label = encoder.inverse_transform([label_index])
        
        frame = draw_keypoints_and_skeleton(frame, keypoints)

        if confidence > threshold:

            rakaat_now, runtutan, kebenaran, gerak_curr = cek_benar(predicted_label, rakaat_now, runtutan, gerak_curr)

            cv2.putText(frame, str(predicted_label), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 
                    1, (255, 255, 255), 2)
    cv2.putText(frame, str(runtutan), (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 
                    1, (255, 255, 255), 2)
    cv2.putText(frame, str(kebenaran), (10, 170), cv2.FONT_HERSHEY_SIMPLEX, 
                    1, (255, 255, 255), 2)
    
    # Resize frame jika perlu (opsional)
    frame = cv2.resize(frame, (720, 1280))  # width x height, tukar kalau portrait

    cv2.imshow('MoveNet Real-Time', frame)
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27