In [None]:
!pip install gradio fastbook moviepy librosa matplotlib yt-dlp

In [None]:
import fastbook
fastbook.setup_book()

In [None]:
from pathlib import Path
from fastai.vision.all import *

path = Path()
path.ls(file_exts='.pkl')

learn_inf = load_learner(path/'export.pkl')

In [None]:
import subprocess

def download_audio(youtube_url, dest):
    dest=Path(dest)
    dest.mkdir(exist_ok=True, parents=True)

    output_template = str(dest / "%(title)s.%(ext)s")

    try:
      # download audio in best format (usually m4a)
      command = [
          "yt-dlp",
          "-f", "bestaudio",
          "-o", output_template,
          "--print", "after_move:filepath",
          youtube_url
      ]
      result=subprocess.run(command, capture_output=True, text=True, check=True, encoding="utf-8")
      return Path(result.stdout).stem.strip()
    except subprocess.CalledProcessError as e:
      print("yt-dlp failed:\n", e.stderr if e.stderr else str(e))

In [None]:
import moviepy as mp
import librosa, librosa.display
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

def audio_to_spectogram(source, dest):
  dest.mkdir(exist_ok=True, parents=True)

  wavdest=Path(dest/"wav")
  wavdest.mkdir(exist_ok=True, parents=True)

  sdest=Path(dest/"sgrams")
  sdest.mkdir(exist_ok=True, parents=True)

  for audio in source.iterdir():
    if audio.is_file():
      print(f"processing: {audio.name}")

      base_name = audio.stem
      audio_path = wavdest / f"{base_name}.wav"

      try:
          clip = mp.VideoFileClip(str(audio))
          clip.audio.write_audiofile(str(audio_path), verbose=False, logger=None)
          clip.close()

      except Exception as e:
          print(f"skipping video processing for {audio.name}: {e}")
          try:
              # use ffmpeg directly to convert to wav if moviepy failed
              command = ["ffmpeg", "-i", str(audio), str(audio_path)]
              subprocess.run(command, check=True, capture_output=True)
          except subprocess.CalledProcessError as sub_e:
              print(f"could not convert {audio.name} to .wav using ffmpeg: {sub_e.stderr.decode()}")
              continue

      # create spectrogram
      try:
          y, sr = librosa.load(str(audio_path))
          S = librosa.feature.melspectrogram(y=y, sr=sr)
          S_dB = librosa.power_to_db(S, ref=np.max)

          out_img = sdest / f"{base_name}.png"
          plt.figure(figsize=(10, 4))
          librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
          plt.axis('off')
          plt.tight_layout()
          plt.savefig(out_img, bbox_inches='tight', pad_inches=0)
          plt.close()
          print(f"generated spectogram: {out_img}")
      except Exception as e:
          print(f"could not generate spectrogram for {audio_path.name}: {e}")

In [None]:
learn_inf = load_learner(path/'export.pkl')

uploads_path = Path()
uploads_path.mkdir(exist_ok=True, parents=True)

def process_yt_url(u):
  title=download_audio(u, dest="audio")
  audio_to_spectogram(uploads_path/"audio", dest=uploads_path)
  print(f"processed {title}")

  predicted_labels=predict_labels(uploads_path/f"sgrams/{title}.png")
  return title, predicted_labels

def predict_labels(image_path):
    pred_class, pred_idx, probs = learn_inf.predict(image_path)
    top3_idx = probs.argsort(descending=True)[:3]
    labels = [learn_inf.dls.vocab[i] for i, p in enumerate(top3_idx) if p > 0.5]
    return " ".join(labels)

In [None]:
import gradio as gr

iface = gr.Interface(
    fn=process_yt_url,
    inputs=gr.Textbox(label="Paste YouTube URL here"),
    outputs=[
        gr.Text(label="Title"),
        gr.Text(label="Predicted Labels")
    ],
    title="YouTube to Spectogram Genre Inference",
    description="Paste a YouTube link"
)

iface.launch(share=True)