Load scores

In [1]:
import os
import numpy as np
from music21 import converter, pitch, interval, instrument, note, stream, meter
from matplotlib import pyplot as plt

from xen.visualise import plotPart
from xen.data.SongData import SongDataSet

data_dir = "../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie"

dataset = SongDataSet()
dataset.loadMidiDir(data_dir, recursive=True)


Loading 268 files


IntProgress(value=0, max=268)

Label(value='')

Loaded 268 songs


## Filter

In [2]:
dataset = dataset.filterTimeSig('4/4')

print(f'{len(dataset.songs)} songs after filtering')

Filtered to 268 songs
268 songs after filtering


## Parts analysis

In [18]:
partscount = dict()
partnames = dict()
scorepartcounts = dict()

for i, song in enumerate(dataset.songs):
    parts = song.getParts()
    partscount[len(parts)] = partscount.get(len(parts), 0) + 1
    scorepartcounts[song.filePath] = len(parts)
    for part in parts:
        partnames[part.partName] = partnames.get(part.partName, 0) + 1

# Analysis of number of scores with different number of parts
print(partscount)
# List unique part names
print(partnames)
# List scores with number of parts
print(sorted(scorepartcounts.items(), key=lambda x: x[1], reverse=True))

{1: 268}
{'rock 9': 1, 'rock 15': 1, 'rock 14': 1, 'rock 8': 1, 'rock 13': 1, 'rock 12': 1, 'rock 10': 1, 'rock 11': 1, 'break 9': 1, 'break 8': 1, 'break 1': 1, 'break 3': 1, 'break 2': 1, 'break 6': 1, 'break 7': 1, 'break 5': 1, 'break 4': 1, 'break 10': 1, 'break 11': 1, 'rock 1': 1, 'rock 3': 1, 'break 12': 1, 'rock 2': 1, 'rock 6': 1, 'rock 7': 1, 'rock 5': 1, 'rock 4': 1, 'Kong 1': 241}
[('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 9.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 15.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 14.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 8.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 13.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 12.mid', 1), ('../../../ai/tr

## Time Signatures

Count number of scores which use each time signature

In [19]:
timesigs = dataset.splitByTimeSignature()

for timesig in timesigs:
    print(f'{len(timesigs[timesig].songs)} songs in {timesig} time')

# print(timesigs['3/4'].songs[0].filePath)

268 songs in 4/4 time


In [20]:
# time signatures are usually stored within the measures object and can change mid score
timesigscount = dict()
scoretimesigscount = dict()

for i, song in enumerate(dataset.songs):
    timesigs = song.score.recurse().getElementsByClass(meter.TimeSignature)
    scoretimesigscount[song.filePath] = len(timesigs)

    for timesig in timesigs:
        timsiglabel = f'{timesig.numerator}/{timesig.denominator}'
        timesigscount[timsiglabel] = timesigscount.get(timsiglabel, 0) + 1

print(timesigscount)
print(sorted(scoretimesigscount.items(), key=lambda x: x[1], reverse=True))

{'4/4': 268}
[('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 9.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 15.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 14.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 8.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 13.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 12.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 10.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/rock 11.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/break 9.mid', 1), ('../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie/break 8.mid', 1), ('../../

## Notes Used

In [22]:
from xen.codecs.NoteSequenceSparseCodec import NoteSequenceSparseCodec

notecounts = {}

for song in dataset.songs:
    songnotecounts = {}
    for part in song.getParts():
        for element in part.recurse().notes:
            if element.isNote:
                midi = element.pitch.midi
                notecounts[midi] = notecounts.get(midi, 0) + 1
                songnotecounts[midi] = songnotecounts.get(midi, 0) + 1
            if element.isChord:
                for note in element.notes:
                    midi = note.pitch.midi
                    notecounts[midi] = notecounts.get(midi, 0) + 1
                    songnotecounts[midi] = songnotecounts.get(midi, 0) + 1
    # print(song.filePath)
    # print(songnotecounts)
    
print((notecounts))


{36: 1969, 42: 147, 38: 1499, 46: 489, 47: 235, 50: 14, 41: 22, 51: 1126, 43: 363, 45: 301, 49: 452, 40: 58, 53: 8}
