In [18]:
import numpy as np
import librosa
from tensorflow import keras

# GTZAN genres
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

def extract_melspectrogram(audio_path, sample_rate=22050, duration=30.0):
    """Extract mel-spectrogram from audio file"""
    # Load audio
    audio, sr = librosa.load(audio_path, sr=sample_rate, duration=duration)

    # Pad if needed
    n_samples = int(sample_rate * duration)
    if len(audio) < n_samples:
        audio = np.pad(audio, (0, n_samples - len(audio)))
    else:
        audio = audio[:n_samples]

    # Create mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=audio,
        sr=sample_rate,
        n_mels=128,
        n_fft=2048,
        hop_length=512
    )

    # Convert to dB and normalize
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    mel_spec_db = (mel_spec_db - mel_spec_db.mean()) / mel_spec_db.std()

    # Ensure correct shape (128, 1292)
    if mel_spec_db.shape[1] < 1292:
        pad_width = 1292 - mel_spec_db.shape[1]
        mel_spec_db = np.pad(mel_spec_db, ((0, 0), (0, pad_width)))
    else:
        mel_spec_db = mel_spec_db[:, :1292]

    # Add channel dimension: (128, 1292, 1)
    return mel_spec_db[..., np.newaxis]


def predict_genre(model, audio_path):
    """Predict genre for a single audio file"""
    # Extract features
    features = extract_melspectrogram(audio_path)
    features = features[np.newaxis, ...]  # Add batch dimension

    # Predict
    predictions = model.predict(features, verbose=0)[0]

    # Get results
    predicted_idx = np.argmax(predictions)
    confidence = predictions[predicted_idx]

    # Top 3
    top_3_idx = np.argsort(predictions)[-3:][::-1]

    return {
        'genre': GENRES[predicted_idx],
        'confidence': float(confidence),
        'top_3': [(GENRES[i], float(predictions[i])) for i in top_3_idx],
        'all_probabilities': {genre: float(prob) for genre, prob in zip(GENRES, predictions)}
    }


# ============================================================
# USAGE EXAMPLES
# ============================================================

if __name__ == "__main__":
    # Load the fixed model (super simple!)
    print("Loading model...")
    model = keras.models.load_model('gtzan_model.keras')  # Keras 3 native format
    # Or if you have the H5 version:
    # model = keras.models.load_model('gtzan_fixed.h5')
    print("âœ“ Model loaded!\n")

    # Example 1: Single prediction
    print("="*60)
    print("SINGLE PREDICTION")
    print("="*60)

    result = predict_genre(model, 'test2.mp3')

    print(f"\nðŸŽµ Predicted Genre: {result['genre'].upper()}")
    print(f"   Confidence: {result['confidence']:.2%}")
    print("\nTop 3 Predictions:")
    for i, (genre, prob) in enumerate(result['top_3'], 1):
        bar = 'â–ˆ' * int(prob * 40) + 'â–‘' * (40 - int(prob * 40))
        print(f"  {i}. {genre:12s} {bar} {prob:.2%}")


    # Example 2: Batch prediction
    print("\n\n" + "="*60)
    print("BATCH PREDICTION")
    print("="*60)

    audio_files = ['test2.mp3']

    for audio_file in audio_files:
        try:
            result = predict_genre(model, audio_file)
            print(f"{audio_file:20s} â†’ {result['genre']:12s} ({result['confidence']:.1%})")
        except Exception as e:
            print(f"{audio_file:20s} â†’ Error: {e}")


    # Example 3: Get all probabilities
    print("\n\n" + "="*60)
    print("ALL GENRE PROBABILITIES")
    print("="*60)

    result = predict_genre(model, 'test2.mp3')
    print(f"\nFor file: test_song.wav\n")

    for genre, prob in sorted(result['all_probabilities'].items(), key=lambda x: x[1], reverse=True):
        bar = 'â–ˆ' * int(prob * 50) + 'â–‘' * (50 - int(prob * 50))
        print(f"{genre:12s} {bar} {prob:.2%}")

Loading model...


  saveable.load_own_variables(weights_store.get(inner_path))


âœ“ Model loaded!

SINGLE PREDICTION

ðŸŽµ Predicted Genre: HIPHOP
   Confidence: 35.19%

Top 3 Predictions:
  1. hiphop       â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ 35.19%
  2. country      â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ 26.51%
  3. pop          â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ 26.49%


BATCH PREDICTION
test2.mp3            â†’ hiphop       (35.2%)


ALL GENRE PROBABILITIES

For file: test_song.wav

hiphop       â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ 35.19%
country      â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ 2