In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import torch # PyTorch + CUDA for GPU acceleration
import torchaudio as ta # PyTorch audio processing library
import music21 as m21
import mido as md # Used for MIDI file visualization and manipulation
import pretty_midi as pm 
import librosa as lb
import random
import glob
import threading
import os
import base64
import requests
import json
import fluidsynth as fs # For MIDI playback as MP3/WAV files
import matplotlib.pyplot as plt
import spotipy
import aiohttp
import asyncio
import concurrent.futures
from queue import Queue
from spotipy.oauth2 import SpotifyClientCredentials
from torch.utils.data import Dataset, DataLoader
from multiprocessing import Pool
# import tensorflow as tf # Exclude tf in favour of PyTorch

In [4]:
# Define file paths for sampling
file_path = "Project\\Data\Archive\\4_Non_Blondes\\Whats_Up.5.mid" # Replace with any file path
midi_content_path = "Data\\Samples\\sample_midi_content.txt"
midi_info_path = "Data\\Samples\\sample_midi_info.txt"
spotify_response_path = "Data\\Samples\\sample_spotify_response.txt"

# Define paths for working directories
DataPath = "Data\\Archive" # Change path as needed: / or \\ depending on OS

# File to store cached genre information
cache_filename = "Data\\Samples\\genre_cache.txt"

# Create an empty DataFrame
df = pd.DataFrame()

In [5]:
# Find file paths to MIDI files
FilePaths = []

subdirectories = [os.path.join(DataPath, d) for d in os.listdir(DataPath) if os.path.isdir(os.path.join(DataPath, d))]

for subdir in subdirectories:
    files = [f for f in os.listdir(subdir) if os.path.isfile(os.path.join(subdir, f))]
    
    for file in files:
        file_path = os.path.join(subdir, file)
        # print(file_path) # Uncomment to see file paths
        FilePaths.append(file_path)

# Add file paths to DataFrame, print current DataFrame
df["file_paths"] = FilePaths
df

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'Project\\Data\\Archive'

In [None]:
# Define function to extract MIDI file information
def search_tempo(midi_file):
    for msg in midi_file:
        if msg.type == "set_tempo":
            return msg.tempo

def search_key_signature(midi_file):
    for msg in midi_file:
        if msg.type == "key_signature":
            return msg.key

def search_time_signature(midi_file):
    for msg in midi_file:
        if msg.type == "time_signature":
            return msg.numerator, msg.denominator, msg.clocks_per_click, msg.notated_32nd_notes_per_beat

In [None]:
# Test MIDI file content, change file_path in cell above to test other files
midi_file = md.MidiFile(file_path)

sample_tempo = search_tempo(midi_file)
sample_key_signature = search_key_signature(midi_file)
sample_time_signature = search_time_signature(midi_file)

sample_numerator = sample_time_signature[0]
sample_denominator = sample_time_signature[1]
sample_clocks_per_click = sample_time_signature[2]
sample_notated_32nd_notes_per_beat = sample_time_signature[3]

print(f"Tempo: {sample_tempo}")
print(f"Key Signature: {sample_key_signature}")
print(f"Time Signature: {sample_numerator}/{sample_denominator}, {sample_clocks_per_click} clocks per click, {sample_notated_32nd_notes_per_beat} 32nd notes per beat")


Tempo: 214285
Key Signature: G
Time Signature: 2/2, 24 clocks per click, 8 32nd notes per beat


In [None]:
# Save sameple MIDI content to text file
with open(midi_content_path, "w+") as file:
    file.write(str(midi_file)) # See sample_midi_content.txt for output

# Save sample MIDI info to text file
with open(midi_info_path, "w+") as file:
    file.write(f"Tempo: {sample_tempo}\n")
    file.write(f"Key Signature: {sample_key_signature}\n")
    file.write(f"Time Signature: {sample_numerator}/{sample_denominator}\n")
    file.write(f"Clocks Per Click: {sample_clocks_per_click}\n")
    file.write(f"Notated 32nd Notes Per Beat: {sample_notated_32nd_notes_per_beat}\n")

In [None]:
# Extract song names from file paths
SongNames = []

for file in FilePaths:
    song_edit1 = file.split("\\")[-1]
    song_edit2 = song_edit1.replace(".mid", "")
    song_edit3 = song_edit2.replace("_", " ")
    song_edit4 = song_edit3.replace(" .", " ")
    song_edit5 = song_edit4.replace(".", " ")
    SongNames.append(song_edit5)

# Uncomment to view type
x = random.randint(0, len(SongNames) - 1)
print(f"Entry {x}'s data type:", type(SongNames[x]))
    
# Add song names to DataFrame, print current DataFrame
df["song_names"] = SongNames
df

Entry 9894's data type: <class 'str'>


Unnamed: 0,file_paths,song_names
0,Data\Archive\.38 Special\Caught Up In You.mid,Caught Up In You
1,Data\Archive\.38 Special\Fantasy Girl.mid,Fantasy Girl
2,"Data\Archive\10,000_Maniacs\A_Campfire_Song.mid",A Campfire Song
3,Data\Archive\101_Strings\Theme_From_The_Godfat...,Theme From The Godfather
4,Data\Archive\10cc\Dreadlock_Holiday.1.mid,Dreadlock Holiday 1
...,...,...
17224,Data\Archive\ZZ_Top\Sleeping_Bag.mid,Sleeping Bag
17225,Data\Archive\ZZ_Top\Tush.1.mid,Tush 1
17226,Data\Archive\ZZ_Top\Tush.2.mid,Tush 2
17227,Data\Archive\ZZ_Top\Tush.mid,Tush


In [None]:
# Extract song names from file paths
Artists = []

for file in FilePaths:
    artist_edit1 = file.split("\\")[-2]
    artist_edit2 = artist_edit1.replace("_", " ")
    artist_edit3 = artist_edit2.replace(" .", " ")
    artist_edit4 = artist_edit3.replace(".", " ")
    artist_edit5 = artist_edit4.replace("  ", " ")
    Artists.append(artist_edit4)

# Uncomment to view type
x = random.randint(0, len(Artists) - 1)
print(f"Entry {x}'s data type:", type(Artists[x]))

# Add artist names to DataFrame, print current DataFrame
df["artists"] = Artists
df

Entry 7319's data type: <class 'str'>


Unnamed: 0,file_paths,song_names,artists
0,Data\Archive\.38 Special\Caught Up In You.mid,Caught Up In You,38 Special
1,Data\Archive\.38 Special\Fantasy Girl.mid,Fantasy Girl,38 Special
2,"Data\Archive\10,000_Maniacs\A_Campfire_Song.mid",A Campfire Song,"10,000 Maniacs"
3,Data\Archive\101_Strings\Theme_From_The_Godfat...,Theme From The Godfather,101 Strings
4,Data\Archive\10cc\Dreadlock_Holiday.1.mid,Dreadlock Holiday 1,10cc
...,...,...,...
17224,Data\Archive\ZZ_Top\Sleeping_Bag.mid,Sleeping Bag,ZZ Top
17225,Data\Archive\ZZ_Top\Tush.1.mid,Tush 1,ZZ Top
17226,Data\Archive\ZZ_Top\Tush.2.mid,Tush 2,ZZ Top
17227,Data\Archive\ZZ_Top\Tush.mid,Tush,ZZ Top


In [None]:
# Move this cell anywhere to write DataFrame at that point to CSV file
df.to_csv("Data\\Preprocessed\\file_info_processed.csv", index=False)