In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import torch # PyTorch + CUDA for GPU acceleration
import torchaudio as ta # PyTorch audio processing library
import music21 as m21
import mido as md # Used for MIDI file visualization and manipulation
import pretty_midi as pm 
import librosa as lb
import random
import glob
import threading
import os
import base64
import requests
import json
import fluidsynth as fs # For MIDI playback as MP3/WAV files
import matplotlib.pyplot as plt
import spotipy
import aiohttp
import asyncio
import concurrent.futures
from queue import Queue
from spotipy.oauth2 import SpotifyClientCredentials
from torch.utils.data import Dataset, DataLoader
from multiprocessing import Pool
# import tensorflow as tf # Exclude tf in favour of PyTorch

In [9]:
df = pd.read_csv("Data\\Preprocessed\\file_info_processed.csv")

FilePaths = df["file_paths"]
SongNames = df["song_names"]
Artists = df["artists"]

df

Unnamed: 0,file_paths,song_names,artists
0,Data\Archive\.38 Special\Caught Up In You.mid,Caught Up In You,38 Special
1,Data\Archive\.38 Special\Fantasy Girl.mid,Fantasy Girl,38 Special
2,"Data\Archive\10,000_Maniacs\A_Campfire_Song.mid",A Campfire Song,"10,000 Maniacs"
3,Data\Archive\101_Strings\Theme_From_The_Godfat...,Theme From The Godfather,101 Strings
4,Data\Archive\10cc\Dreadlock_Holiday.1.mid,Dreadlock Holiday 1,10cc
...,...,...,...
17224,Data\Archive\ZZ_Top\Sleeping_Bag.mid,Sleeping Bag,ZZ Top
17225,Data\Archive\ZZ_Top\Tush.1.mid,Tush 1,ZZ Top
17226,Data\Archive\ZZ_Top\Tush.2.mid,Tush 2,ZZ Top
17227,Data\Archive\ZZ_Top\Tush.mid,Tush,ZZ Top


In [2]:
# Define file paths
spotify_response_path = "Data\\Samples\\sample_spotify_response.txt"
cache_filename = "Data\\Samples\\genre_cache.json"

In [3]:
# Create caches, initialize Spotify Web API
Genres = []

# Set up Spotify credentials
client_id = "b3408a951b0146859c54aad62197a2ac"
client_secret = "f7582056e4d049609c1c9d11b229d7ec"

client_credentials_manager = SpotifyClientCredentials(
    client_id=client_id, client_secret=client_secret
)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
# Test the Spotify Web API, examine response structure
test_song = "What's Up"
test_artist = "4 Non Blondes"
results = sp.search(q=f"track:{test_song} artist:{test_artist}", type="track", limit=1)

with open(spotify_response_path, "w+") as file:
    file.write(str(results))

In [None]:
# Functions to access the cached file
def load_cache(filename):
    try:
        with open(filename, 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        return {}

def save_cache(filename, cache):
    with open(filename, 'w') as file:
        file.write(str(cache))

# Load cached genre information from the file
genre_cache = load_cache(cache_filename)

In [None]:
# Function to get song genre from the Spotify Web API
def get_genre(song_name, artist):
    cache_key = (song_name, artist)

    if cache_key in genre_cache:
        return genre_cache[cache_key]

    results = sp.search(q=f"track:{song_name} artist:{artist}", type="track", limit=1)

    if results["tracks"]["items"]:
        track_id = results["tracks"]["items"][0]["id"]
        track_info = sp.track(track_id)
        if track_info["artists"]:
            artist_id = track_info["artists"][0]["id"]
            artist_info = sp.artist(artist_id)
            if artist_info["genres"]:
                genre_cache[cache_key] = artist_info["genres"]
                save_cache(cache_filename, genre_cache)
                return artist_info["genres"]

    return None

In [None]:
for i, file in enumerate(FilePaths):
    song_name = SongNames[i]
    artist = Artists[i]
    genre = get_genre(song_name, artist)
    if genre:
        print(i, genre)
        Genres.append(genre)
    else:
        print(f"{i} Skipped.")
        Genres.append(None)

In [None]:
# List of songs to process
songs_to_process = zip(SongNames, Artists)

def process_song(song_name, artist):
    genre = get_genre(song_name, artist)
    return (song_name, artist, genre)

# Number of concurrent workers
num_workers = 8

# Process songs concurrently
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
    results = executor.map(lambda x: process_song(*x), songs_to_process)

# Iterate over results and handle them accordingly
for song_name, artist, genre in results:
    if genre:
        Genres.append(genre)
    else:
        Genres.append(None)