In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import music21 as m21 # Used for music data processing, analysis, metadata extraction, etc. 
import mido as md # Used for MIDI file visualization and manipulation
import pretty_midi as pm 
import librosa as lb
import fractions
import glob
import time
import os
import fluidsynth as fs # For MIDI playback as MP3/WAV files
import matplotlib.pyplot as plt
from multiprocessing import Pool, Process

In [2]:
df = pd.read_csv("Data\\Preprocessed\\file_info_processed.csv")

FilePaths = df["file_paths"]
SongNames = df["song_names"]
Artists = df["artists"]

df

Unnamed: 0,file_paths,song_names,artists
0,Data\Archive\.38 Special\Caught Up In You.mid,Caught Up In You,38 Special
1,Data\Archive\.38 Special\Fantasy Girl.mid,Fantasy Girl,38 Special
2,"Data\Archive\10,000_Maniacs\A_Campfire_Song.mid",A Campfire Song,"10,000 Maniacs"
3,Data\Archive\101_Strings\Theme_From_The_Godfat...,Theme From The Godfather,101 Strings
4,Data\Archive\10cc\Dreadlock_Holiday.1.mid,Dreadlock Holiday 1,10cc
...,...,...,...
17224,Data\Archive\ZZ_Top\Sleeping_Bag.mid,Sleeping Bag,ZZ Top
17225,Data\Archive\ZZ_Top\Tush.1.mid,Tush 1,ZZ Top
17226,Data\Archive\ZZ_Top\Tush.2.mid,Tush 2,ZZ Top
17227,Data\Archive\ZZ_Top\Tush.mid,Tush,ZZ Top


In [3]:
def get_meta(midi_file):
    score = m21.converter.parse(midi_file)
    print(type(score), "\n")
    key = score.analyze("key")
    time_signature = score.getTimeSignatures()[0]
    tempo_mark = score.metronomeMarkBoundaries()[0]
    duration = score._getDuration()
    tempo = tempo_mark[2]

    print("Key: ", key, " , Type: ", type(key))
    print("Time Signature: ", time_signature, " , Type: ", type(time_signature))
    print("Duration: ", duration, " , Type: ", type(duration))
    print("Tempo: ", tempo, " , Type: ", type(tempo))

    print()

    print("Key: ", key.name)
    print("Time Signature: ", time_signature.ratioString)
    print("Duration: ", duration.quarterLength)
    print("Tempo: ", tempo.number)

    return key, time_signature, duration, tempo

In [4]:
# Test metadata extraction
x = get_meta(FilePaths[204])

<class 'music21.stream.base.Score'> 

Key:  C major  , Type:  <class 'music21.key.Key'>
Time Signature:  <music21.meter.TimeSignature 4/4>  , Type:  <class 'music21.meter.base.TimeSignature'>
Duration:  <music21.duration.Duration 1094/3>  , Type:  <class 'music21.duration.Duration'>
Tempo:  <music21.tempo.MetronomeMark Quarter=115>  , Type:  <class 'music21.tempo.MetronomeMark'>

Key:  C major
Time Signature:  4/4
Duration:  1094/3
Tempo:  115


In [5]:
print(x)
print(type(x))

key = x[0].name
time_signature = x[1].ratioString
duration = x[2].quarterLength
tempo = x[3].number

print(key, type(key))
print(time_signature, type(time_signature))
print(duration, type(duration))
print(tempo, type(tempo))

(<music21.key.Key of C major>, <music21.meter.TimeSignature 4/4>, <music21.duration.Duration 1094/3>, <music21.tempo.MetronomeMark Quarter=115>)
<class 'tuple'>
C major <class 'str'>
4/4 <class 'str'>
1094/3 <class 'fractions.Fraction'>
115 <class 'int'>


In [6]:
duration_in_quarter_notes = 1094 / 3  # 1094 divided by 3
tempo_in_bpm = 115

duration_in_seconds = (duration_in_quarter_notes / tempo_in_bpm) * 60
print("Duration in seconds:", duration_in_seconds)

Duration in seconds: 190.2608695652174


In [7]:
# Define lists to store metadata, which will be used to add to the DataFrame
Keys = []
TimeSignatures = []
Durations = []
Tempos = []
# Notes = []

In [8]:
# Function to extract metadata from MIDI files
def parse_midi(midi_file):
    try:
        score = m21.converter.parse(midi_file)
        return score
    except Exception as e:
        print(f"Error processing {midi_file}: {e}")
        return None

# Functions for multi-processing, dependent on the function above
def get_metadata(midi_file):
    metadata = parse_midi(midi_file)
    if metadata:
        key = metadata.analyze("key").name
        time_signature = metadata.getTimeSignatures()[0].ratioString
        duration = metadata._getDuration().quarterLength
        tempo = metadata.metronomeMarkBoundaries()[0][2].number
    else:
        key = time_signature = duration = tempo = "Unknown"
    return key, time_signature, duration, tempo

In [9]:
# Process each MIDI file and collect metadata
def process_midi_file(midi_file):
    key, time_signature, duration, tempo = get_metadata(midi_file)
    return key, time_signature, duration, tempo

In [10]:
songs = FilePaths
# Initialize the pool of workers
with Pool() as pool:
    metadata_list = pool.map(process_midi_file, songs)

In [None]:
Keys, TimeSignatures, Durations, Tempos = zip(*metadata_list)

In [None]:
print(Keys)

[]


In [None]:
df["keys"] = Keys
df["time_signatures"] = TimeSignatures
df["durations"] = Durations
df["tempos"] = Tempos
# df["notes"] = Notes

df

ValueError: Length of values (0) does not match length of index (17229)