# Introduction
Calculates a pitch-class distribution for all works in a given corpus.

### Imports
Import libraries and write settings here.

In [1]:
from music21 import *
import statistics
import pandas as pd

### Corpus
Sets up corpus.

In [7]:
properCorpus = corpus.corpora.LocalCorpus('DPhilCorpus')
properCorpus.addPath('~/Desktop/Oxford/Corpus/Sounding Corpus (Movements)/XML/')
properCorpus.directoryPaths
properCorpus.save()

listOfPieceNames = []
listOfWorks = properCorpus.search('Op.')
for el in listOfWorks:
    listOfPieceNames.append(el.sourcePath)




# Analysis/Modeling
Calculates the pitch-class distribution.

In [3]:
def getDistribution(piece):
    ## establishes some metadata for the piece
    thePiece = corpus.parse(piece)
    fileTitle = str(piece).replace("/Users/joshua ballance/Desktop/Oxford/Corpus/Sounding Corpus (Movements)/XML/", "").replace(".xml", "").replace(".mxl", "")
    opusNumber = fileTitle.replace("Op.", "").replace(" ", "").replace("i", "").replace("v", "")
    movementNumber = fileTitle.replace("Op.", "").replace(" ", "").replace("1", "").replace("2", "").replace("3", "").replace("4", "").replace("5", "").replace("6", "").replace("7", "").replace("8", "").replace("9", "").replace("0", "")
    ## makes a list of all the notes in the work
    listOfNotes = []
    notes = thePiece.flat.recurse().notes
    for el in notes:
        listOfNotes.append(el)
    ## groups the notes by pitch class and summarises their duration
    listOfPitchesAndSeconds = []
    for el in listOfNotes:
        if len(el.pitches) == 1:
            listOfPitchesinSingleNote = []
            listOfPitchesinSingleNote.append(el.pitches)
            firstRevisedNewList = []
            for c in listOfPitchesinSingleNote:
                for d in c:
                    newNote = [d.pitchClass, el.seconds]
                    listOfPitchesAndSeconds.append(newNote)
        else:
            listOfPitchesInChord = []
            listOfPitchesInChord.append(el.pitches)
            revisedNewList = []
            for h in listOfPitchesInChord:
                for i in h:
                    newNote = [i.pitchClass, el.seconds]
                    listOfPitchesAndSeconds.append(newNote)
    listOfPitches = []
    for el in listOfPitchesAndSeconds:
        listOfPitches.append(el[0])
    setOfPitches = set(listOfPitches)
    listOfAllSeconds = []
    for el in listOfPitchesAndSeconds:
        listOfAllSeconds.append(el[1])
    sumOfAllSeconds = sum(listOfAllSeconds)
    finalListOfDurations = []
    for el in setOfPitches:
        initialListOfSeconds = []
        for i in listOfPitchesAndSeconds:
            if el == i[0]:
                initialListOfSeconds.append(i[1])
        listOfSummedDurations = [el, sum(initialListOfSeconds)]
        finalListOfDurations.append(listOfSummedDurations)
    ## calculates pitch-class durations as percentages of all notes
    listOfPercentages = []
    listOfJustPercentages = []
    for el in finalListOfDurations:
        percentage = el[1]/sumOfAllSeconds*100
        listOfJustPercentages.append(percentage)
        listOfPercentages.append(percentage)
    ## calculates the range of the distribution
    theRange = max(listOfJustPercentages)-min(listOfJustPercentages)
    ## compiles the data
    finalTotalListWithPercentage = [fileTitle, int(opusNumber), movementNumber]
    for el in listOfPercentages:
        finalTotalListWithPercentage.append(el)
    finalTotalListWithPercentage.append(theRange)
    finalTotalListWithPercentage.append(sumOfAllSeconds)
    listOfTotalResults.append(finalTotalListWithPercentage)

In [4]:
listOfTotalResults = []
listOfRanges = []
for el in listOfPieceNames:
    getDistribution(el)
## sorts the results by Opus number
listOfTotalResults.sort(key=lambda x: x[1])

# Results
Display the results as a dataframe.

In [10]:
df = pd.DataFrame(listOfTotalResults, columns = ['Title', 'Opus Number', 'Movement Number', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 'Range', 'Duration']) 
df 

Unnamed: 0,Title,Opus Number,Movement Number,0,1,2,3,4,5,6,7,8,9,10,11,Range,Duration
0,Op. 1,1,,5.994485,9.956939,12.996667,7.187132,7.947557,9.403077,8.297919,5.851755,5.720852,11.311128,11.738005,3.594484,9.402183,5648.316020
1,Op. 2,2,,7.908017,4.532162,13.365618,6.167383,8.937307,1.117400,14.641509,9.303083,6.987873,8.659673,9.416844,8.963132,13.524109,386.870726
2,Op. 3 i,3,i,3.191065,7.424212,13.158157,9.737734,13.088353,5.783805,5.110690,4.462505,12.789190,2.991623,15.157559,7.105106,12.165935,167.133333
3,Op. 3 ii,3,ii,5.812868,8.434260,8.747931,9.235863,11.554787,11.249829,8.713078,6.438965,10.359850,8.853732,4.768543,5.830294,6.786243,57.385000
4,Op. 3 iii,3,iii,6.949352,8.568905,6.095406,12.102473,9.982332,9.393404,7.567727,3.739694,8.745583,9.305065,7.832744,9.717314,8.362780,94.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,Op. 31 ii,31,ii,7.969925,9.022556,6.616541,6.766917,10.375940,12.180451,7.218045,6.917293,8.270677,8.421053,7.669173,8.571429,5.563910,237.500000
103,Op. 31 iii,31,iii,8.400236,6.722096,8.842655,8.798795,8.407864,8.360190,8.320143,8.960888,9.248842,6.842236,8.169492,8.926562,2.526745,374.564286
104,Op. 31 iv,31,iv,8.160237,10.163205,10.830861,13.427300,6.824926,5.192878,7.344214,8.234421,5.267062,8.679525,8.753709,7.121662,8.234421,96.285714
105,Op. 31 v,31,v,8.585486,10.701107,6.666667,8.413284,10.504305,8.757688,7.847478,8.142681,8.683887,8.314883,5.854859,7.527675,4.846248,362.946429
