# Jingju dataset intervals analysis

In [77]:
from music21 import *
import matplotlib.pyplot as plt

First we defined the classes that will be used in order to perform the analysis of the Jingju interval analysis based on the Jingju dataset provided by Rafael.

We will used two different classes in order to parse the csv file containing all the scores:

 - The first one will be the Jingju score. This class will have 6 attributes: the reference to the xml file, the opera type (xipi or erhuang), the rhythm used in the score, an array of lines (each element of the array will be a dictionary with the opening and finishing line of each part of the score), the voice and instrumental staffs, and the intervals present in the whole score.
 - The line object will contain if it is an open or finish line, the character that performs the line, the lyrics and the boundary offsets of each line.

In [69]:
class Jingju:        
    def __init__(self, scoreXML = '', operaType = '', lines = [], voiceStaff = 0, instrumentalStaff = 0, intervals = {}):
        self.scoreXML = scoreXML
        self.operaType = operaType
        self.lines = lines
        self.voiceStaff = voiceStaff
        self.instrumentalStaff = instrumentalStaff
        self.intervals = intervals
    
    def __str__(self):
        return self.scoreXML

class Line:
    def __init__(self, isOpen = True, character = '', rhythm = '', lyrics = '', initOffset = '', finishOffset = ''):
        self.isOpen = isOpen
        self.character = character
        self.rhythm = rhythm
        self.lyrics  = lyrics
        self.initOffset = initOffset
        self.finishOffset = finishOffset
        
    def __str__(self):
        return self.lyrics

After the class definition, the lines_data.csv file has to be parsed in order to create all the objects needed to perform the analysis.

In order to perform the analysis of the csv file, we wont't consider the jingjus containing two parts to ease the parsing of the file. Instead of using 92 scores, we will use 86 after deleting the specified ones.

In [70]:
# Method for printing the jingju list parsed from the csv file
def print_jingju_list(jingjuList):
    print('{} Jingju scores parsed'.format(len(jingjuList)))
    for j in jingjuList:
        print('Jingju {} containing {} lines'.format(j.scoreXML, len(j.lines)))
        print('Voice staff: {}'.format(j.voiceStaff))
        print('Instrumental staff: {}'.format(j.instrumentalStaff))
        print('Intervals: {}'.format(j.intervals))
        count = 1
        for l in j.lines:
            print('\tLine number {}'.format(count))
            print('\tOpen line: {}'.format(l['openLine']))
            if l['kLine'] != '':
                print('\tK line: {}'.format(l['kLine']))
            print('\tFinish line: {}'.format(l['finishLine']))
            print('\n')
            count += 1

In [71]:
import csv
path_folder = '../JingjuMusicScoresCollection/MusicXML/'

lines_data_path = path_folder + 'lines_data.csv'
jingjuList = [] # Array to store all Jingju objects created when parsing the csv file

count = 0

with open(lines_data_path, 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        if row[0] != '': # Start of a new Jingju
            # As we have come across a new Jingju, we need to store the previously created one.
            # However, we need to skip the first one because no jingju has been created
            if count != 0:
                jingju.lines = jingju_lines
                jingjuList.append(jingju)
            
            # Update the jingju object with a new one
            scoreXML = row[0]
            operaType = row[2]
            jingju = Jingju(scoreXML, operaType)
            jingju_lines = [] # Init the lines array of the Jingju
            
        # Check if there is a currentLine created
        currentLines_exists = 'currentLines' in locals() or 'currentLines' in globals()
        if not currentLines_exists or currentLines['finishLine'] != '':
            currentLines = {'openLine': '', 'finishLine': '', 'kLine': ''}
        
        # Create a new Line object
        line = Line()
        line.character = row[1]
        line.rhythm = row[3]
        
        lineType = row[4]
        line.isOpen = True if lineType.startswith('s') else False
        
        line.lyrics = row[5]
        line.initOffset = row[6]
        line.finishOffset = row[7]
        
        if line.isOpen:
            currentLines['openLine'] = line
        else:
            if lineType.startswith('k'):
                currentLines['kLine'] = line
            else:
                currentLines['finishLine'] = line
            
        # Check if there exists
        
        # If the currentLines are finished, we append it to the jingju lines
        if currentLines['finishLine'] != '':
            jingju_lines.append(currentLines)
            
        # Increment the row count
        count += 1
    
    # We need to store the last jingju
    jingju.lines = jingju_lines
    jingjuList.append(jingju)

# Uncomment the following line to take a look to the list of jingjus
# print_jingju_list(jingjuList)

Now that we have each jingju score well defined and structured, we can start the analysis of the voice-instrumental interval relation.

The first thing that we have to consider is that some scores could have the instrumental and voice part in any of the staffs. So, we will have to annotate each jingju of the list with the staff number where each part is located

In [72]:
for j in jingjuList:
    s = converter.parse(path_folder + j.scoreXML)
    p0 = s.parts[0]
    
    p0_notes = p0.flat.notes.stream()
    isP0Voice = False
    
    # Check all the notes in a part. If any contains a lyric, that is the voice part of the Jingju
    for n in p0_notes:
        if n.lyric != None:
            isP0Voice = True
    
    if isP0Voice == True:
        j.voiceStaff = 0
        j.instrumentalStaff = 1
    else:
        j.voiceStaff = 1
        j.instrumentalStaff = 0

Once we have annotated each jingju with the voice and instrumental staffs, we can start the analysis of the interval relationship between both parts of the score.

We have defined the **calcIntervals** function, which receives a Jingju object and calculate the intervals present in the score comparing the note in each offset in the voice and instrumental parts.

In [73]:
def calcIntervals(j):
    s = converter.parse(path_folder + j.scoreXML)

    pi = s.parts[j.instrumentalStaff] # Instrumental part
    pv = s.parts[j.voiceStaff] # Vocal part

    ni = pi.flat.notes.stream() # All notes from the instrumental part
    nv = pv.flat.notes.stream() # All notes for the vocal part

    intervals = {}

    for n1 in ni:
        if n1.quarterLength > 0: # Skip grace notes
            o = n1.offset
            # Retrieve the notes in the vocal part that occur at the instrumental note's offset.
            # The mustBeginInSpan=False parameter allows retrieving notes that started before that
            # offset are still sounding at that position.
            # The result is a stream, which might contain more than one note in case there are
            # grace notes, which share the offset with the main note.
            nStr = nv.getElementsByOffset(o, mustBeginInSpan=False).stream()
            for n2 in nStr:
                if n2.quarterLength > 0: # Skip grace notes
                    itv = interval.Interval(n1, n2)
                    intervals[itv.name] = intervals.get(itv.name, 0) + 1
    
    j.intervals = intervals
    return intervals

Then, two lists will be defined: one for the xipi intervals and the other for the erhuang ones. Depending on the operaType of each Jingju object, the interval will be stored in one list or the other.

In [74]:
xipi_intervals = []
erhuang_intervals = []
for j in jingjuList:
    intervals = calcIntervals(j)
    
    if j.operaType == 'xipi':
        xipi_intervals.append(intervals)
    else:
        erhuang_intervals.append(intervals)

print_jingju_list(jingjuList)

86 Jingju scores parsed
Jingju daeh-CanQiQi-WuLongZuo.xml containing 2 lines
Voice staff: 1
Instrumental staff: 0
Intervals: {'P8': 342, 'm7': 39, 'm6': 7, 'P4': 7, 'P1': 35, 'P5': 6, 'M9': 17, 'm3': 6, 'm10': 6, 'M6': 4, 'M2': 3, 'M7': 1, 'P12': 2, 'P11': 2, 'm2': 1, 'M10': 1}
	Line number 1
	Open line: 惨凄凄唤苍天天不应响，
	Finish line: 叹红颜多薄命父母双亡。


	Line number 2
	Open line: 千金休哪曾受磨房景况，
	Finish line: 担几肩头晕转体若筛糠。


Jingju daeh-LiYanFei-DaBaoGuo.xml containing 5 lines
Voice staff: 0
Instrumental staff: 1
Intervals: {}
	Line number 1
	Open line: 李艳妃设早朝龙书案下，
	Finish line: 文站东武列西朝贺哀家。


	Line number 2
	Open line: 太师爷奏一本进贡年下，
	Finish line: 各国的众王侯朝贺中华。


	Line number 3
	Open line: 将江山让太师权且代贺，
	Finish line: 候幼主成了龙原业归家。


	Line number 4
	Open line: 写文约我且把玉玺打下，
	Finish line: 宣太师上金殿披红插花。


	Line number 5
	Open line: 赐你的上方剑朝房问话，
	Finish line: 压定了文武臣均要画押。


Jingju daeh-LiYanFei-ErJinGong.xml containing 3 lines
Voice staff: 0
Instrumental staff: 1
Intervals: {}
	Line number 1
	Open line: 李艳妃坐昭阳自思自想，
	Fin

	Open line: 一轮明月照窗下，
	Finish line: 陈宫心（呐）中乱如麻，


	Line number 2
	Open line: 悔不该心猿并意马，
	Finish line: 悔不该随他人到吕家。


	Line number 3
	Open line: 吕伯奢可算得义气（呀）大，
	Finish line: 杀猪（喏）沽酒款待与他，


	Line number 4
	Open line: 又谁知此贼的疑心（呐）太（呀）大，
	Finish line: 拔出剑将他的满门杀，


	Line number 5
	Open line: 一家人倶丧在宝剑之（喏）下，
	Finish line: 年迈的老丈命（呐）染黄沙。


	Line number 6
	Open line: 屈死的怨鬼魂休要怨（呐）咱，
	Finish line: 自有那神灵儿天地鉴察。


Jingju lseh-ZenNengGou-BaDaChui.xml containing 2 lines
Voice staff: 0
Instrumental staff: 1
Intervals: {}
	Line number 1
	Open line: 怎能够思一计番营来进，
	Finish line: 前后话对文龙细说衷情。


	Line number 2
	Open line: 前也思（喏）后又想无有计定，
	Finish line: 倒不如上公安观看古今。


Jingju lseh-ZiNaRi-HongYangDong.xml containing 5 lines
Voice staff: 0
Instrumental staff: 1
Intervals: {}
	Line number 1
	Open line: 自（喏）那日朝罢归身（呐）染重病，
	Finish line: 三更时梦（呃）见（呐）了年迈爹尊（呐）。


	Line number 2
	Open line: 我前番命孟良骸骨搬（呐）请，
	Finish line: 那乃是萧天佐以（呀）假成真；


	Line number 3
	Open line: 二次（喏）里命孟良番营来进，
	Finish line: 又谁知焦克明他私（呀）自后跟（呐），


	Line number 4
	Open l

Since we now have the xipi and erhuang intervals in two different lists, we can make an 'overall interval' of each type by calculating the mean appeareance of each interval in each jingju type.

In [None]:
# Create a dictionary with the equivalence of each interval's size in semitones and its name.
intervalsOrder = {}
for k in jingjuList[0].intervals.keys():
    itv = interval.Interval(k)
    intervalsOrder[itv.semitones] = k
    
# Ordered list of intervals by semitones size
xValues = sorted(intervalsOrder.keys())
# Oredred list of interval names by their semitiones size to be use as ticks for the x axis.
xTicks = [intervalsOrder[i] for i in xValues]
# Ordered list of y axis values
yValues = [jingjuList[0].intervals[i] for i in xTicks]

plt.bar(xValues, yValues)
plt.xticks(xValues, xTicks)
plt.show()