In [17]:
import os
import numpy as np
import sounddevice as sd
import librosa
import cv2
from scipy.spatial.distance import cosine

# Konfigurasi
duration = 2
sample_rate = 22050

# Folder sumber data
audio_folder = "Hewan/suara"
image_folder = "Hewan/gambar"

# Hanya hewan tertentu
animal_list = ["kambing", "anjing", "kucing"]

def extract_fft_mel(y, sr):
    # FFT
    fft_spectrum = np.abs(np.fft.fft(y))[:len(y) // 2]
    target_length = (len(fft_spectrum) // 100) * 100
    fft_trimmed = fft_spectrum[:target_length]
    fft_features = np.mean(fft_trimmed.reshape(-1, 100), axis=1)

    # Mel
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    mel_mean = np.mean(mel_db, axis=1)

    return np.concatenate([fft_features, mel_mean])

# Load fitur audio hewan
animal_features = {}
for name in animal_list:
    path = os.path.join('/Hewan/suara', f"kambing.wav")
    if os.path.exists(path):
        y, sr = librosa.load(path, sr=sample_rate)
        features = extract_fft_mel(y, sr)
        animal_features[name] = features

def record_and_extract_features():
    print("🎙️ Rekam suara tiruan hewan...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
    sd.wait()
    y = audio.flatten()
    return extract_fft_mel(y, sample_rate)

def find_best_match(user_feat):
    min_dist = float("inf")
    best_match = None
    for name, feat in animal_features.items():
        dist = cosine(user_feat, feat)
        if dist < min_dist:
            min_dist = dist
            best_match = name
    print(f"✅ Suara paling mirip: {best_match} (jarak: {min_dist:.4f})")
    return best_match

def show_filter_result(animal_name):
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("🚫 Kamera tidak dapat dibuka.")
        return

    img_path = os.path.join('/Hewan/suara', f"kambing.png")
    if not os.path.exists(img_path):
        print(f"🚫 Gambar anjing.png tidak ditemukan.")
        cap.release()
        return

    overlay = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
    overlay = cv2.resize(overlay, (250, 250))

    def overlay_image(frame, overlay_img, x, y):
        h, w = overlay_img.shape[:2]
        if overlay_img.shape[2] == 4:
            alpha = overlay_img[:, :, 3] / 255.0
            for c in range(3):
                frame[y:y+h, x:x+w, c] = (
                    alpha * overlay_img[:, :, c] + (1 - alpha) * frame[y:y+h, x:x+w, c]
                )
        else:
            frame[y:y+h, x:x+w] = overlay_img

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        overlay_image(frame, overlay, 140, 230)
        cv2.putText(frame, f"TOP 1 suara {animal_name}", (60, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 255), 3)
        cv2.putText(frame, f"TOP 1 suara {animal_name}", (60, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 0), 1)

        cv2.circle(frame, (100, 430), 40, (0, 255, 255), -1)
        cv2.putText(frame, "?", (80, 450), cv2.FONT_HERSHEY_SIMPLEX,
                    2, (255, 255, 255), 4)

        cv2.imshow("🐾 AR Suara Hewan", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

# Jalankan
if __name__ == "__main__":
    user_features = record_and_extract_features()
    best_animal = find_best_match(user_features)
    if best_animal:
        show_filter_result(best_animal)


🎙️ Rekam suara tiruan hewan...
✅ Suara paling mirip: None (jarak: inf)


In [3]:
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("🚫 Kamera tidak dapat dibuka.")
else:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("🚫 Tidak dapat membaca frame dari kamera.")
            break
        cv2.imshow("Kamera", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    cap.release()
    cv2.destroyAllWindows()