In [3]:
import tkinter as tk
from tkinter import filedialog, messagebox
import os
from pydub import AudioSegment
from pydub.playback import play
import numpy as np
import librosa
import joblib
import sounddevice as sd
import soundfile as sf
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

# Load pre-trained Decision Tree model and scaler
classifier = joblib.load('decision_tree_classifier.pkl')
scaler = joblib.load('scaler.pkl')
label_encoder = joblib.load('label_encoder.pkl')

# Fungsi ekstraksi fitur (dari kode yang Anda berikan)
def extract_rhythm_features(y, sr):
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    return tempo, np.mean(onset_env), np.std(onset_env)

def extract_frequency_features(y, sr, n_mfcc=20):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    return (
        np.mean(mfcc.T, axis=0),
        np.mean(chroma.T, axis=0),
        np.mean(spectral_contrast.T, axis=0),
        np.mean(spectral_centroid.T, axis=0)
    )

def extract_f0_features(y, sr, f0_min=50, f0_max=400):
    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr, fmin=f0_min, fmax=f0_max, threshold=0.75)
    f0_values = [pitches[magnitudes[:, i].argmax(), i] for i in range(magnitudes.shape[1]) if magnitudes[:, i].any()]
    if f0_values:
        f0_mean = np.mean(f0_values)
        f0_std = np.std(f0_values)
    else:
        f0_mean = 0
        f0_std = 0
    return f0_mean, f0_std

def extract_features(y, sr, num_segments=5, n_mfcc=20, n_fft=2048, hop_length=512):
    num_samples_per_segment = int(len(y) / num_segments)
    all_features = []
    
    for s in range(num_segments):
        start_sample = s * num_samples_per_segment
        end_sample = start_sample + num_samples_per_segment
        segment = y[start_sample:end_sample]
        
        mfcc_mean, chroma_mean, spectral_contrast_mean, spectral_centroid_mean = extract_frequency_features(segment, sr, n_mfcc)
        tempo, onset_env_mean, onset_env_std = extract_rhythm_features(segment, sr)
        f0_mean, f0_std = extract_f0_features(segment, sr)
        
        features = np.concatenate([
            mfcc_mean, chroma_mean, spectral_contrast_mean, spectral_centroid_mean, 
            [tempo, onset_env_mean, onset_env_std, f0_mean, f0_std]
        ])
        all_features.append(features)
    
    all_features_mean = np.mean(all_features, axis=0)
    return all_features_mean

# Initialize main window
root = tk.Tk()
root.title("Music Genre Classification")

def play_song():
    song_path = song_listbox.get(tk.ACTIVE)
    if song_path:
        song = AudioSegment.from_file(song_path)
        play(song)

def predict_genre_from_file():
    song_path = song_listbox.get(tk.ACTIVE)
    if song_path:
        y, sr = librosa.load(song_path)
        features = extract_features(y, sr)
        features = scaler.transform([features])
        genre_index = classifier.predict(features)
        genre = label_encoder.inverse_transform(genre_index)[0]
        genre_output_box.delete("1.0", tk.END)
        genre_output_box.insert(tk.END, f"Predicted Genre: {genre}")

def record_audio():
    duration = 10
    fs = 44100
    messagebox.showinfo("Recording", "Recording for 30 seconds...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=2)
    sd.wait()
    sf.write("recorded_audio.wav", recording, fs)
    predict_genre_from_recording("recorded_audio.wav")

def predict_genre_from_recording(file_path):
    y, sr = librosa.load(file_path)
    features = extract_features(y, sr)
    features = scaler.transform([features])
    genre_index = classifier.predict(features)
    genre = label_encoder.inverse_transform(genre_index)[0]
    genre_output_box.delete("1.0", tk.END)
    genre_output_box.insert(tk.END, f"Predicted Genre: {genre}")

frame = tk.Frame(root)
frame.pack(pady=20)

song_listbox = tk.Listbox(frame, width=50, height=10)
song_listbox.pack(side=tk.LEFT, padx=10)

scrollbar = tk.Scrollbar(frame, orient="vertical")
scrollbar.config(command=song_listbox.yview)
scrollbar.pack(side=tk.LEFT, fill="y")

song_listbox.config(yscrollcommand=scrollbar.set)

def load_dataset_songs():
    file_paths = filedialog.askopenfilenames(filetypes=[("Audio Files", "*.wav")])
    if file_paths:
        for file_path in file_paths:
            song_listbox.insert(tk.END, file_path)

load_dataset_button = tk.Button(root, text="Load Dataset Songs", command=load_dataset_songs)
load_dataset_button.pack(pady=10)

play_button = tk.Button(root, text="Play Song", command=play_song)
play_button.pack(pady=10)

predict_button = tk.Button(root, text="Predict Genre from File", command=predict_genre_from_file)
predict_button.pack(pady=10)

record_button = tk.Button(root, text="Record and Predict Genre", command=record_audio)
record_button.pack(pady=10)

genre_output_box = tk.Text(root, height=2, width=50)
genre_output_box.pack(pady=10)

root.mainloop()

