In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import torch # PyTorch + CUDA for GPU acceleration
import torchaudio as ta # PyTorch audio processing library
import music21 as m21
import mido as md # Used for MIDI file visualization and manipulation
import pretty_midi as pm 
import librosa as lb
import random
import glob
import threading
import os
import base64
import requests
import json
import fluidsynth as fs # For MIDI playback as MP3/WAV files
import matplotlib.pyplot as plt
import spotipy
import aiohttp
import asyncio
import concurrent.futures
from queue import Queue
from spotipy.oauth2 import SpotifyClientCredentials
from torch.utils.data import Dataset, DataLoader
from multiprocessing import Pool
# import tensorflow as tf # Exclude tf in favour of PyTorch

In [3]:
df = pd.read_csv("Data\\Preprocessed\\file_info_processed.csv")

FilePaths = df["file_paths"]
SongNames = df["song_names"]
Artists = df["artists"]

df

Unnamed: 0,file_paths,song_names,artists
0,Data\Archive\.38 Special\Caught Up In You.mid,Caught Up In You,38 Special
1,Data\Archive\.38 Special\Fantasy Girl.mid,Fantasy Girl,38 Special
2,"Data\Archive\10,000_Maniacs\A_Campfire_Song.mid",A Campfire Song,"10,000 Maniacs"
3,Data\Archive\101_Strings\Theme_From_The_Godfat...,Theme From The Godfather,101 Strings
4,Data\Archive\10cc\Dreadlock_Holiday.1.mid,Dreadlock Holiday 1,10cc
...,...,...,...
17224,Data\Archive\ZZ_Top\Sleeping_Bag.mid,Sleeping Bag,ZZ Top
17225,Data\Archive\ZZ_Top\Tush.1.mid,Tush 1,ZZ Top
17226,Data\Archive\ZZ_Top\Tush.2.mid,Tush 2,ZZ Top
17227,Data\Archive\ZZ_Top\Tush.mid,Tush,ZZ Top


In [None]:
# Functions to parse MIDI files
def get_midi_content(file_path):
    tempo = None
    key_signature = None
    numerator = None
    denominator = None
    clocks_per_click = None
    notated_32nd_notes_per_beat = None
    
    try:
        midi_file = md.MidiFile(file_path)
        for msg in midi_file:
            if msg.type == "set_tempo":
                tempo = msg.tempo
            else:
                tempo = None

            if msg.type == "key_signature":
                key_signature = msg.key
            else:
               key_signature = None

            if msg.type == "time_signature":
                numerator = msg.numerator
                denominator = msg.denominator
                clocks_per_click = msg.clocks_per_click
                notated_32nd_notes_per_beat = msg.notated_32nd_notes_per_beat
            else:
                numerator = None
                denominator = None
                clocks_per_click = None
                notated_32nd_notes_per_beat = None
            
            if tempo and key_signature and numerator and denominator and clocks_per_click and notated_32nd_notes_per_beat:
                return tempo, key_signature, numerator, denominator, clocks_per_click, notated_32nd_notes_per_beat

    except OSError as e:
        print(f"Error processing file {file_path}: {e}")
        pass

    return tempo, key_signature, numerator, denominator, clocks_per_click, notated_32nd_notes_per_beat

In [None]:
Tempo = []
KeySignatures = []
TimeSignatures = []
CPC = []
Notated32ndNotes = []

for file in FilePaths:
    midi_file = get_midi_content(file)
    tempo = midi_file[0]
    key_signature = midi_file[1]
    numerator = midi_file[2]
    denominator = midi_file[3]
    clocks_per_click = midi_file[4]
    notated_32nd_notes_per_beat = midi_file[5]
    
    Tempo.append(tempo)
    KeySignatures.append(f"{key_signature}")
    TimeSignatures.append(f"{numerator}/{denominator}")
    CPC.append(clocks_per_click)
    Notated32ndNotes.append(notated_32nd_notes_per_beat)


In [None]:
# tempo, key_signature, numerator, denominator, clocks_per_click, notated_32nd_notes_per_beat