Load scores

In [42]:
import os
import numpy as np
from music21 import converter, pitch, interval, instrument, note, stream, meter
from matplotlib import pyplot as plt

from xen.visualise import plotPart
from xen.data.SongData import SongDataSet

# data_dir = "../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/Rock-Indie"
# paths = ["../../../ai/trainingdata/music/songsterr/lorna-shore/Lorna Shore - Pain Remains III- In a Sea of Fire (555725).mid"]
# paths = ["../../../ai/trainingdata/music/songsterr/lorna-shore/"]
paths = ["../../../ai/trainingdata/music/800000_Drum_Percussion_MIDI_Archive/50's Drummer MIDI Files/01 Rock'n'Roll/01 Dancin Rick 166BPM/01 8th Hat.mid"]

dataset = SongDataSet.fromMidiPaths(paths, recursive=False)


Loading 64 files


IntProgress(value=0, max=64)

Label(value='')

Loaded 64 songs


## Filter

In [43]:
dataset = dataset.filterTimeSig('4/4')

print(f'{len(dataset.songs)} songs after filtering')

Filtered to 64 songs
64 songs after filtering


## Parts analysis

In [46]:
partscount = {}
partnames = {}
instruments = {}
scorepartcounts = {}

for i, song in enumerate(dataset.songs):
    parts = song.getParts()
    partscount[len(parts)] = partscount.get(len(parts), 0) + 1
    scorepartcounts[song.filePath] = len(parts)

    # instrument.partitionByInstrument(song.score)
    # print(song.score.getInstruments())

    for part in parts:
        partnames[part.partName] = partnames.get(part.partName, 0) + 1
        partinstrument = part.getInstruments()[-1]
        # print(part.getInstruments()[-1].instrumentName)
        # print(part.getInstruments()[-1].midiProgram)
        # print(part.getInstruments()[-1].midiChannel)
        instruments[partinstrument.instrumentName] = instruments.get(partinstrument.instrumentName, 0) + 1

# Analysis of number of scores with different number of parts
print("Part counts")
print(partscount)
# List unique part names
print("Part names")
print(partnames)
# List unique instrument names
print("Instrument names")
print(instruments)
# List scores with number of parts
print("Score part counts")
print(sorted(scorepartcounts.items(), key=lambda x: x[1], reverse=True))

Part counts
{4: 7, 2: 6, 15: 3, 9: 4, 10: 4, 6: 8, 5: 9, 7: 7, 11: 3, 17: 1, 1: 4, 14: 2, 8: 2, 18: 2, 12: 1, 13: 1}
Part names
{'Solo Guitar': 5, 'Lead Guitar': 4, 'Rhythm Guitar': 6, 'Bass Guitar': 6, 'Adam De Micco': 9, 'Lead': 4, 'Solo': 8, "Andrew O'Connor - Rhythm Guitar": 14, 'Adam De Micco - Lead Guitar': 14, 'Adam De Micco - Harmonies': 7, 'Michael Yager - Bass Guitar': 13, 'Austin Archey - Drums': 15, 'Violin Melody': 10, 'Violin Support': 7, 'Viola': 9, 'Cello': 13, 'Contrabass': 10, 'Choir Pad': 5, 'Strings Pad': 7, 'Strings Pad Reverb': 4, 'Synth': 3, 'Bass Pad': 5, 'Ambient 1': 1, 'Ambient 2': 1, 'Drums': 17, 'Adam': 1, 'Adam De Micco - Additional Guitar': 2, 'Choir': 13, 'Pad Synthesizer': 2, 'Rhythm L': 2, 'Rhythm R': 2, 'Bass': 12, 'Drum SFX': 7, 'Synth Strings': 7, 'Strings': 7, 'Distortion Guitar': 2, 'LEAD GTR': 3, 'SOLO': 9, 'SOLO HARMONY': 2, 'Guitar 1': 4, 'Guitar 2': 4, 'Lead/Solo': 1, 'FX': 1, 'Organ': 2, 'RYTHYM GTR L': 2, 'RYTHYM GTR R': 2, 'BASS': 2, 'LEAD G

## Time Signatures

Count number of scores which use each time signature

In [5]:
timesigs = dataset.splitByTimeSignature()

for timesig in timesigs:
    print(f'{len(timesigs[timesig].songs)} songs in {timesig} time')

# print(timesigs['3/4'].songs[0].filePath)

18 songs in 4/4 time


In [6]:
# time signatures are usually stored within the measures object and can change mid score
timesigscount = dict()
scoretimesigscount = dict()

for i, song in enumerate(dataset.songs):
    timesigs = song.score.recurse().getElementsByClass(meter.TimeSignature)
    scoretimesigscount[song.filePath] = len(timesigs)

    for timesig in timesigs:
        timsiglabel = f'{timesig.numerator}/{timesig.denominator}'
        timesigscount[timsiglabel] = timesigscount.get(timsiglabel, 0) + 1

print(timesigscount)
print(sorted(scoretimesigscount.items(), key=lambda x: x[1], reverse=True))

{'4/4': 18}
[('../../../ai/trainingdata/music/songsterr/lorna-shore/Lorna Shore - Pain Remains III- In a Sea of Fire (555725).mid', 18)]


## Notes Used

In [22]:
from xen.codecs.NoteSequenceSparseCodec import NoteSequenceSparseCodec

notecounts = {}

for song in dataset.songs:
    songnotecounts = {}
    for part in song.getParts():
        for element in part.recurse().notes:
            if element.isNote:
                midi = element.pitch.midi
                notecounts[midi] = notecounts.get(midi, 0) + 1
                songnotecounts[midi] = songnotecounts.get(midi, 0) + 1
            if element.isChord:
                for note in element.notes:
                    midi = note.pitch.midi
                    notecounts[midi] = notecounts.get(midi, 0) + 1
                    songnotecounts[midi] = songnotecounts.get(midi, 0) + 1
    # print(song.filePath)
    # print(songnotecounts)
    
print((notecounts))


{36: 1969, 42: 147, 38: 1499, 46: 489, 47: 235, 50: 14, 41: 22, 51: 1126, 43: 363, 45: 301, 49: 452, 40: 58, 53: 8}
