In [None]:
!pip install essentia-tensorflow

In [None]:
!curl -Z -O https://essentia.upf.edu/models/feature-extractors/discogs-effnet/discogs-effnet-bs64-1.pb \
-O https://essentia.upf.edu/models/classification-heads/genre_discogs400/genre_discogs400-discogs-effnet-1.pb \
-O https://essentia.upf.edu/models/classification-heads/genre_discogs400/genre_discogs400-discogs-effnet-1.json

See the [Essentia documentation](https://essentia.upf.edu/models.html) for more information on the models.

In [None]:
import json
from collections import defaultdict
from pathlib import Path
from tqdm import tqdm

with open("genre_discogs400-discogs-effnet-1.json") as fh:
  metadata = json.load(fh)
class_map = metadata["classes"]

In [None]:
categories = defaultdict(list)
for i, class_name in enumerate(class_map):
  cat, sub_cat = class_name.split("---")
  categories[cat].append(i)
cat_ranges = {}
for cat, values in categories.items():
  cat_ranges[cat] = min(values), max(values)

In [None]:
cat_ranges

In [None]:
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D

audio = MonoLoader(filename="musiccaps/-bgHkxwoliw_30.mp3", sampleRate=16000, resampleQuality=4)()
backend_model = TensorflowPredictEffnetDiscogs(graphFilename="discogs-effnet-bs64-1.pb", output="PartitionedCall:1")
embeddings = backend_model(audio)

In [None]:
classifier = TensorflowPredict2D(graphFilename="genre_discogs400-discogs-effnet-1.pb", input="serving_default_model_Placeholder", output="PartitionedCall:0")
predictions = classifier(embeddings).mean(0)

In [None]:
class_map[predictions.mean(0).argmax()]

In [None]:
musiccaps_path = Path("musiccaps")

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

In [None]:
results = {}
all_predictions = {}
for file_path in tqdm(musiccaps_path.iterdir(), total=990):
  audio = MonoLoader(filename=str(file_path), sampleRate=16000, resampleQuality=4)()
  embeddings = backend_model(audio)
  predictions = classifier(embeddings).mean(0)
  genre_label = class_map[predictions.argmax()].split("---")[0]
  results[file_path.name] = genre_label
  all_predictions[file_path.name] = predictions.tolist()

In [None]:
with open("musiccaps_genres.json", "w") as fh:
  json.dump(results, fh)

In [None]:
import csv
with open("musiccaps_preds.csv", "w", newline="") as fh:
  mywriter = csv.writer(fh)
  mywriter.writerow(["name"] + class_map)
  for name, vals in all_predictions.items():
    mywriter.writerow([name] + vals)