In [1]:
import os
import numpy as np
import sounddevice as sd
import librosa
import cv2
from scipy.spatial.distance import cosine
from sklearn.preprocessing import StandardScaler

# Konfigurasi
duration = 2
sample_rate = 22050
n_mfcc = 20  # Jumlah koefisien MFCC yang akan digunakan

# Folder sumber data (pastikan sesuai dengan struktur folder Anda)
audio_folder = "Hewan/suara"
image_folder = "Hewan/gambar"

# Hanya hewan tertentu
animal_list = ["kambing", "anjing", "kucing"]

def extract_fft_mfcc(y, sr, n_mfcc=n_mfcc, target_length=None):
    # FFT
    fft_spectrum = np.abs(np.fft.fft(y))[:len(y) // 2]
    target_fft_length = (len(fft_spectrum) // 100) * 100
    fft_trimmed = fft_spectrum[:target_fft_length]
    fft_features = np.mean(fft_trimmed.reshape(-1, 100), axis=1)

    # MFCC
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfccs_mean = np.mean(mfccs, axis=1)

    combined_features = np.concatenate([fft_features, mfccs_mean])

    if target_length is not None:
        if combined_features.shape[0] < target_length:
            padding_length = target_length - combined_features.shape[0]
            combined_features = np.pad(combined_features, (0, padding_length), 'constant')
        elif combined_features.shape[0] > target_length:
            combined_features = combined_features[:target_length]

    return combined_features

# Load fitur audio hewan dan cari panjang maksimum
animal_features = {}
max_feature_length = 0
for name in animal_list:
    path = os.path.join(audio_folder, f"{name}.wav")
    if os.path.exists(path):
        y, sr = librosa.load(path, sr=sample_rate)
        features = extract_fft_mfcc(y, sr)
        animal_features[name] = features
        max_feature_length = max(max_feature_length, features.shape[0])
    else:
        print(f"⚠️ File suara {name}.wav tidak ditemukan di {audio_folder}")

print("✅ Fitur audio hewan yang berhasil dimuat (sebelum padding/truncating):")
for name, features in animal_features.items():
    print(f"  ➡️ Bentuk fitur suara {name}: {features.shape}")

# Pad/truncate fitur hewan agar memiliki panjang yang sama
for name, features in animal_features.items():
    animal_features[name] = extract_fft_mfcc(librosa.load(os.path.join(audio_folder, f"{name}.wav"), sr=sample_rate)[0], sample_rate, target_length=max_feature_length)

print("\n✅ Fitur audio hewan yang berhasil dimuat (setelah padding/truncating):")
for name, features in animal_features.items():
    print(f"  ➡️ Bentuk fitur suara {name}: {features.shape}")

def record_sound(target_length):
    print("🎙️ Merekam suara tiruan hewan...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='float32')
    sd.wait()
    y = audio.flatten()
    return extract_fft_mfcc(y, sample_rate, target_length=target_length)

def find_best_match(user_feat):
    min_dist = float("inf")
    best_match = None
    print("➡️ Bentuk fitur suara pengguna:", user_feat.shape)
    for name, feat in animal_features.items():
        print(f"  ➡️ Bentuk fitur suara {name}:", feat.shape)
        dist = cosine(user_feat, feat)
        print(f"     ➡️ Jarak kosinus dengan suara {name}: {dist:.4f}")
        if dist < min_dist:
            min_dist = dist
            best_match = name
    print(f"✅ Suara paling mirip: {best_match} (jarak: {min_dist:.4f})")
    return best_match

def show_filter_result(cap, animal_name):
    if not cap.isOpened():
        print("🚫 Kamera tidak dapat dibuka.")
        return

    img_path = os.path.join(image_folder, f"{animal_name}.png")
    if not os.path.exists(img_path):
        print(f"🚫 Gambar {animal_name}.png tidak ditemukan di {image_folder}")
        return

    overlay = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
    overlay = cv2.resize(overlay, (250, 250))

    def overlay_image(frame, overlay_img, x, y):
        h, w = overlay_img.shape[:2]
        if overlay_img.shape[2] == 4:
            alpha = overlay_img[:, :, 3] / 255.0
            for c in range(3):
                frame[y:y+h, x:x+w, c] = (
                    alpha * overlay_img[:, :, c] + (1 - alpha) * frame[y:y+h, x:x+w, c]
                )
        else:
            frame[y:y+h, x:x+w] = overlay_img

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        overlay_image(frame, overlay, 140, 230)
        cv2.putText(frame, f"TOP 1 suara {animal_name}", (60, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 255), 3)
        cv2.putText(frame, f"TOP 1 suara {animal_name}", (60, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 0), 1)

        cv2.circle(frame, (100, 430), 40, (0, 255, 255), -1)
        cv2.putText(frame, "?", (80, 450), cv2.FONT_HERSHEY_SIMPLEX,
                    2, (255, 255, 255), 4)

        cv2.imshow("🐾 AR Suara Hewan", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

def run_camera_and_get_input(cap):
    if not cap.isOpened():
        print("🚫 Kamera tidak dapat dibuka.")
        return False

    print("🎥 Kamera sedang berjalan. Tekan 's' untuk memulai merekam suara...")
    while True:
        ret, frame = cap.read()
        if not ret:
            print("🚫 Tidak dapat menerima frame dari kamera.")
            return False
        cv2.imshow("Kamera", frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('s'):
            return True
        elif key == ord('q'):
            return False
    return False

# Jalankan
if __name__ == "__main__":
    cap = cv2.VideoCapture(0)  # Inisialisasi kamera di awal
    if not cap.isOpened():
        print("🚫 Gagal membuka kamera.")
    else:
        if run_camera_and_get_input(cap):
            user_features = record_sound(target_length=max_feature_length)
            best_animal = find_best_match(user_features)
            if best_animal:
                show_filter_result(cap, best_animal)
        else:
            print("❌ Proses dibatalkan oleh pengguna.")
    if cap.isOpened():
        cap.release()
        cv2.destroyAllWindows()

✅ Fitur audio hewan yang berhasil dimuat (sebelum padding/truncating):
  ➡️ Bentuk fitur suara kambing: (1126,)
  ➡️ Bentuk fitur suara anjing: (1817,)
  ➡️ Bentuk fitur suara kucing: (914,)

✅ Fitur audio hewan yang berhasil dimuat (setelah padding/truncating):
  ➡️ Bentuk fitur suara kambing: (1817,)
  ➡️ Bentuk fitur suara anjing: (1817,)
  ➡️ Bentuk fitur suara kucing: (1817,)
🎥 Kamera sedang berjalan. Tekan 's' untuk memulai merekam suara...
🎙️ Merekam suara tiruan hewan...
➡️ Bentuk fitur suara pengguna: (1817,)
  ➡️ Bentuk fitur suara kambing: (1817,)
     ➡️ Jarak kosinus dengan suara kambing: 1.0107
  ➡️ Bentuk fitur suara anjing: (1817,)
     ➡️ Jarak kosinus dengan suara anjing: 1.0058
  ➡️ Bentuk fitur suara kucing: (1817,)
     ➡️ Jarak kosinus dengan suara kucing: 1.0117
✅ Suara paling mirip: anjing (jarak: 1.0058)
