In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import music21 as m21 # Used for music data processing, analysis, metadata extraction, etc. 
import mido as md # Used for MIDI file visualization and manipulation
import pretty_midi as pm 
import librosa as lb
import glob
import time
import os
import fluidsynth as fs # For MIDI playback as MP3/WAV files
import matplotlib.pyplot as plt
from multiprocessing import Pool

In [3]:
df = pd.read_csv("Data\\Preprocessed\\file_info_processed.csv")

FilePaths = df["file_paths"]
SongNames = df["song_names"]
Artists = df["artists"]

df

Unnamed: 0,file_paths,song_names,artists
0,Data\Archive\.38 Special\Caught Up In You.mid,Caught Up In You,38 Special
1,Data\Archive\.38 Special\Fantasy Girl.mid,Fantasy Girl,38 Special
2,"Data\Archive\10,000_Maniacs\A_Campfire_Song.mid",A Campfire Song,"10,000 Maniacs"
3,Data\Archive\101_Strings\Theme_From_The_Godfat...,Theme From The Godfather,101 Strings
4,Data\Archive\10cc\Dreadlock_Holiday.1.mid,Dreadlock Holiday 1,10cc
...,...,...,...
17224,Data\Archive\ZZ_Top\Sleeping_Bag.mid,Sleeping Bag,ZZ Top
17225,Data\Archive\ZZ_Top\Tush.1.mid,Tush 1,ZZ Top
17226,Data\Archive\ZZ_Top\Tush.2.mid,Tush 2,ZZ Top
17227,Data\Archive\ZZ_Top\Tush.mid,Tush,ZZ Top


In [28]:
def get_meta(midi_file):
    score = m21.converter.parse(midi_file)
    print(type(score), "\n")
    key = score.analyze("key")
    time_signature = score.getTimeSignatures()[0]
    tempo_mark = score.metronomeMarkBoundaries()[0]
    duration = score._getDuration()
    tempo = tempo_mark[2]

    print("Key: ", key, " , Type: ", type(key))
    print("Time Signature: ", time_signature, " , Type: ", type(time_signature))
    print("Duration: ", duration, " , Type: ", type(duration))
    print("Tempo: ", tempo, " , Type: ", type(tempo))

    print()

    print("Key: ", key.tonic.name, key.mode)
    print("Time Signature: ", time_signature.ratioString)
    print("Duration: ", duration.quarterLength)
    print("Tempo: ", tempo.number)

    return key, time_signature, duration, tempo

In [35]:
# Test metadata extraction
x = get_meta(FilePaths[2024])

<class 'music21.stream.base.Score'> 

Key:  E- major  , Type:  <class 'music21.key.Key'>
Time Signature:  <music21.meter.TimeSignature 3/4>  , Type:  <class 'music21.meter.base.TimeSignature'>
Duration:  <music21.duration.Duration 432.0>  , Type:  <class 'music21.duration.Duration'>
Tempo:  <music21.tempo.MetronomeMark allegretto Quarter=107>  , Type:  <class 'music21.tempo.MetronomeMark'>

Key:  E- major
Time Signature:  3/4
Duration:  432.0
Tempo:  107


In [36]:
print(x)
print(type(x))

key = f"{x[0].tonic.name} {x[0].mode}"
time_signature = x[1].ratioString
duration = x[2].quarterLength
tempo = x[3].number

print(key, type(key))
print(time_signature, type(time_signature))
print(duration, type(duration))
print(tempo, type(tempo))

(<music21.key.Key of E- major>, <music21.meter.TimeSignature 3/4>, <music21.duration.Duration 432.0>, <music21.tempo.MetronomeMark allegretto Quarter=107>)
<class 'tuple'>
E- major <class 'str'>
3/4 <class 'str'>
432.0 <class 'float'>
107 <class 'int'>


In [6]:
# Function to extract metadata from MIDI files
def parse_midi(midi_file):
    score = m21.converter.parse(midi_file)
    return score

# Functions for multi-processing, dependent on the function above
def get_key(metadata):
    key = metadata.analyze("key")
    return key

def get_ts(metadata):
    time_signature = metadata.getTimeSignatures()[0]
    return time_signature

def get_tempo(metadata):
    tempo_mark = metadata.metronomeMarkBoundaries()[0]
    tempo = tempo_mark[2]
    return tempo

def get_duration(metadata):
    duration = metadata._getDuration()
    return duration

In [7]:
# Do not run
def get_meta_parallel(midi_file):
    score = parse_midi(midi_file)
    with Pool() as pool:
        key = pool.apply(analyze_key, args=(score,))
        time_signature = pool.apply(extract_time_signature, args=(score,))
        tempo = pool.apply(extract_tempo, args=(score,))
        duration = pool.apply(calculate_duration, args=(score,))
    return key, time_signature, duration, tempo

In [None]:
# Check later if needed
def get_notes(midi_file):
    score = m21.converter.parse(midi_file)
    notes = score.flat.notes
    print("Number of notes: ", len(notes))
    return notes

# Test note extraction
y = get_notes(FilePaths[204])