## Start CRIM Intervals

How to use Morgan Ngrams to examine musical files

In [1]:
# import items from intervals and 
import re
# from crim_intervals import *
import pandas as pd
import visualizations as viz
%cd ../intervals
from main import *


/Users/dangtrang/OneDrive - brynmawr.edu/summer 2021/crim_intervals/intervals


## Load MEI Files from CRIM or Github by pasting one or more of [these links](https://docs.google.com/spreadsheets/d/1TzRqnzgcYYuQqZR78c5nizIsBWp4pnblm2wbU03uuSQ/edit?auth_email=rfreedma@haverford.edu#gid=0) below.

*Note:  each file must be in quotation marks and separated by commas


In [2]:
corpus = CorpusBase(['https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_3.0/CRIM_Model_0001.mei'])
# corpus = CorpusBase(['https://raw.githubusercontent.com/RichardFreedman/CRIM_additional_works/main/CRIM_Ave_Test.mei_msg.mei'])

Requesting file from https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_3.0/CRIM_Model_0001.mei...
Successfully imported.


## Give the scores short names, in order according to the way they were listed above
* if more than one piece, then `piece1, piece2 = corpus.scores` 
** of course this can be `model`, `mass`, or any other name you like, as long as these are used in the requests for particular patterns below
* if just one piece, then `model = corpus.scores[0]`


In [3]:
# model, mass = corpus.scores
model = corpus.scores[0]


## Or batch process a series of files via the list below
* Be sure to choose a directory as part of `a.to_csv(f"{short_title}.csv")`
* You can use _any_ of the methods in the batch process below, so getNotes_Rests, etc.

In [4]:
titles = ['https://crimproject.org/mei/CRIM_Model_0001.mei',  
'https://crimproject.org/mei/CRIM_Model_0002.mei',  
'https://crimproject.org/mei/CRIM_Model_0008.mei'
'https://crimproject.org/mei/CRIM_Model_0009.mei',  
'https://crimproject.org/mei/CRIM_Model_0010.mei',  
'https://crimproject.org/mei/CRIM_Model_0011.mei',  
'https://crimproject.org/mei/CRIM_Model_0012.mei',  
'https://crimproject.org/mei/CRIM_Model_0013.mei',  
'https://crimproject.org/mei/CRIM_Model_0014.mei',  
'https://crimproject.org/mei/CRIM_Model_0015.mei',  
'https://crimproject.org/mei/CRIM_Model_0016.mei',  
'https://crimproject.org/mei/CRIM_Model_0017.mei',  
'https://crimproject.org/mei/CRIM_Model_0019.mei',  
'https://crimproject.org/mei/CRIM_Model_0020.mei',  
'https://crimproject.org/mei/CRIM_Model_0021.mei',
'https://crimproject.org/mei/CRIM_Mass_0001_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0001_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0001_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0001_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0001_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0002_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0002_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0002_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0002_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0002_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0003_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0003_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0003_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0003_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0003_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0004_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0004_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0004_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0004_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0004_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0005_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0005_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0005_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0005_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0005_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0006_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0006_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0006_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0006_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0006_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0007_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0007_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0007_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0007_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0007_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0008_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0008_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0008_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0008_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0008_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0009_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0009_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0009_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0009_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0009_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0010_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0010_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0010_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0010_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0010_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0011_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0011_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0011_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0011_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0011_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0012_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0012_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0012_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0012_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0012_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0013_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0013_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0013_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0013_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0013_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0014_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0014_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0014_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0014_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0014_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0015_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0015_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0015_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0015_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0015_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0016_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0016_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0016_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0016_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0016_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0017_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0017_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0017_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0017_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0017_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0018_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0018_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0018_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0018_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0018_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0019_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0019_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0019_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0019_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0019_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0020_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0020_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0020_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0020_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0020_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0021_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0021_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0021_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0021_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0021_5.mei',  
'https://crimproject.org/mei/CRIM_Mass_0022_1.mei',  
'https://crimproject.org/mei/CRIM_Mass_0022_2.mei',  
'https://crimproject.org/mei/CRIM_Mass_0022_3.mei',  
'https://crimproject.org/mei/CRIM_Mass_0022_4.mei',  
'https://crimproject.org/mei/CRIM_Mass_0022_5.mei']

In [5]:
# for title in titles:
#     short_title = clean_title = re.search("([^\/]+$)", title).group()
#     corpus = CorpusBase([title])
#     piece = corpus.scores[0]
# #   a= piece.getMelodic()
#     # a= piece.getNoteRest()
#     a = piece.getNgrams(how='modules', cell_type=str)
#     #a.to_csv(f"{short_title}.csv")
#     a.head()

Requesting file from https://crimproject.org/mei/CRIM_Model_0001.mei...
Successfully imported.
Requesting file from https://crimproject.org/mei/CRIM_Model_0002.mei...
Successfully imported.
Requesting file from https://crimproject.org/mei/CRIM_Model_0008.meihttps://crimproject.org/mei/CRIM_Model_0009.mei...
Import from https://crimproject.org/mei/CRIM_Model_0008.meihttps://crimproject.org/mei/CRIM_Model_0009.mei failed, please check your url. File paths must begin with a '/'. Continuing to next file...


Exception: At least one score must be succesfully imported

## Now apply various methods to the scores:
* **getNoteRest** returns all the notes and rests, each voice as a column
* **getDuration** returns the durations for all notes and rests, as above
* **getMelodic** returns the melodic intervals in each voice as a column
* **getHarmonic** returns pairs of harmonic intervals between each pair of voices
* **getNgrams**  returns segments of various kinds, melodic (one voice) or modular (pairs of voices, including vertical and horizontal motion)
---

### Pandas Tools:
* **df.value_counts()**  returns summary for each pitch, duration for any type
* save results as variable, then:**`.apply(pd.Series.value_counts).fillna(0).astype(int) `**
---

### Documentation available via this command:
* for any method, use the following read documentation:
`print(model.getNgrams.__doc__)`

---


In [6]:
print(model.getNgrams.__doc__)

 Group sequences of observations in a sliding window "n" events long
        (default n=3). These cells of the resulting DataFrame can be grouped as 
        desired by setting `cell_type` to `tuple` (default), `list`, or `str`. 
        If the `exclude` parameter is passed, if any item in that list is found 
        in an ngram, that ngram will be removed from the resulting DataFrame. 
        Since `exclude` defaults to `['Rest']`, pass an empty list if you want 
        to allow rests in your ngrams.

        There are two primary modes for the `how` parameter. When set to
        "columnwise" (default), this is the simple case where the events in each
        column of the `df` DataFrame has its events grouped at the offset of the
        first event in the window. For example, to get 4-grams of melodic
        intervals:

        ip = ImportedPiece('path_to_piece')
        ngrams = ip.getNgrams(df=ip.getMelodic(), n=4)

        If `how` is set to 'modules' this will return contrap

## Methods and Parameters
within parentheses, specify paramenters.  These are optional
* kind="d" for diatonic with major(or q(uality), z(ero based), c(chromatic)
* directed=True or False
* compound=True or False
* unit=2 (or whatever increment is preferred; 1=quarter note)

In [7]:
d = model.getMelodic(kind="d", directed=True, compound=False)
d.head()

Unnamed: 0,Superius,Contratenor,PrimusTenor,SecundusTenor,Bassus
0.0,,Rest,Rest,Rest,Rest
4.0,5.0,,,,
8.0,,Rest,Rest,Rest,Rest
10.0,-2.0,,,,
12.0,2.0,,,,


# Notes and Rests


In [8]:
notes = model.getNoteRest()
notes.fillna(value= "-", inplace=True)
notes.reset_index()

Unnamed: 0,index,Superius,Contratenor,PrimusTenor,SecundusTenor,Bassus
0,0.0,G4,Rest,Rest,Rest,Rest
1,4.0,D5,-,-,-,-
2,8.0,-,Rest,Rest,Rest,Rest
3,10.0,C5,-,-,-,-
4,12.0,D5,D4,-,-,-
...,...,...,...,...,...,...
777,1065.0,-,-,-,-,F3
778,1066.0,-,B-3,-,D4,G3
779,1068.0,-,E4,-,C4,E3
780,1070.0,-,-,-,-,C3


In [9]:
notes.value_counts()
# notes.stack().value_counts()


Superius  Contratenor  PrimusTenor  SecundusTenor  Bassus
-         G4           -            -              -         20
          E4           -            -              -         15
C5        -            -            -              -         14
-         F4           -            -              -         14
D5        -            -            -              -         11
                                                             ..
-         Rest         -            G3             E3         1
                                    B-3            G3         1
                                    -              -          1
          G4           Rest         B-3            G3         1
Rest      Rest         G3           -              -          1
Length: 530, dtype: int64

In [10]:
df = notes.apply(pd.Series.value_counts).fillna(0).astype(int)
df


Unnamed: 0,Superius,Contratenor,PrimusTenor,SecundusTenor,Bassus
-,380,324,384,381,415
A3,0,4,46,41,37
A4,64,61,0,1,0
B-2,0,0,0,0,1
B-3,0,11,46,52,27
B-4,59,15,0,0,0
B3,0,1,1,2,0
B4,1,0,0,0,0
C3,0,0,0,0,20
C4,0,18,64,55,21


# Melodic Intervals
* kind='d' for diatonic; 's' for chromatic/semitone
* To save as CSV:  
`mel_int.to_csv('file_name.csv')`


In [11]:
mel_int = model.getMelodic(kind='d')
mel_int.fillna(value= "-", inplace=True)
mel_int.reset_index()


Unnamed: 0,index,Superius,Contratenor,PrimusTenor,SecundusTenor,Bassus
0,0.0,-,Rest,Rest,Rest,Rest
1,4.0,5,-,-,-,-
2,8.0,-,Rest,Rest,Rest,Rest
3,10.0,-2,-,-,-,-
4,12.0,2,-,-,-,-
...,...,...,...,...,...,...
776,1065.0,-,-,-,-,2
777,1066.0,-,-2,-,-2,2
778,1068.0,-,4,-,-2,-3
779,1070.0,-,-,-,-,-3


# Durations


In [12]:
durs = model.getDuration()
durs.fillna(value= "-", inplace=True)

## Combine Notes and Durations as One DataFrame

In [13]:
notes_durs = pd.concat([notes, durs], axis=1)
notes_durs


Unnamed: 0,Superius,Contratenor,PrimusTenor,SecundusTenor,Bassus,Superius.1,Contratenor.1,PrimusTenor.1,SecundusTenor.1,Bassus.1
0.0,G4,Rest,Rest,Rest,Rest,4.0,8.0,8.0,8.0,8.0
4.0,D5,-,-,-,-,6.0,-,-,-,-
8.0,-,Rest,Rest,Rest,Rest,-,4.0,8.0,8.0,8.0
10.0,C5,-,-,-,-,2.0,-,-,-,-
12.0,D5,D4,-,-,-,2.0,4.0,-,-,-
...,...,...,...,...,...,...,...,...,...,...
1065.0,-,-,-,-,F3,-,-,-,-,1.0
1066.0,-,B-3,-,D4,G3,-,2.0,-,2.0,2.0
1068.0,-,E4,-,C4,E3,-,4.0,-,4.0,2.0
1070.0,-,-,-,-,C3,-,-,-,-,2.0


# Select Columns for One Voice


In [14]:
notes_durs_s = notes_durs.iloc[:, [0,6]]
notes_durs_s

Unnamed: 0,Superius,Contratenor
0.0,G4,8.0
4.0,D5,-
8.0,-,4.0
10.0,C5,-
12.0,D5,4.0
...,...,...
1065.0,-,-
1066.0,-,2.0
1068.0,-,4.0
1070.0,-,-


## N-Grams in Each Voice
* for Melodic or Durations

In [15]:
corpus = CorpusBase(['https://crimproject.org/mei/CRIM_Model_0017.mei'])
model = corpus.scores[0]
mel = model.getMelodic(kind='d', compound=False, unit=4)
mel_ngrams = model.getNgrams(df=mel, n=4, cell_type=str)
mel_ngrams



Requesting file from https://crimproject.org/mei/CRIM_Model_0017.mei...
Successfully imported.


Unnamed: 0,[Discantus],[Contratenor],[Tenor],[QuintaVox],[SextaVox],[Bassus]
4.0,"1, 1, 1, 2","1, 3, -3, 1",,,,
8.0,"1, 1, 2, 1","3, -3, 1, -2",,,,
12.0,"1, 2, 1, 1","-3, 1, -2, -3",,,,
16.0,"2, 1, 1, 1","1, -2, -3, 3",,,,
20.0,"1, 1, 1, 2","-2, -3, 3, 2",,,,
...,...,...,...,...,...,...
1480.0,"1, 1, 2, 1","1, 1, 1, 1","1, 1, 1, -2","1, -3, 1, 1","1, 1, 1, 1","4, 1, 1, 1"
1484.0,"1, 2, 1, 1","1, 1, 1, 2","1, 1, -2, 1","-3, 1, 1, 1","1, 1, 1, 1","1, 1, 1, 1"
1488.0,"2, 1, 1, 1","1, 1, 2, 1","1, -2, 1, 1","1, 1, 1, 1","1, 1, 1, 1","1, 1, 1, 1"
1492.0,"1, 1, 1, 1","1, 2, 1, 1","-2, 1, 1, 1","1, 1, 1, 1","1, 1, 1, 1","1, 1, 1, 1"


### Plot visualizations for all patterns in Ngrams

The `plot_ngrams_heatmap(model, ngrams_df)` method from visualizations plot a heat map based on a model and an ngram dataframe. 

We could retrieve a chart and the dataframe the chart was plotted using:

```
chart, chart_df = viz.plot_ngrams_heatmap(model, mel_ngrams)
```

Or if we don't care about the dataframe, we could display the plot with: 
```
viz.plot_ngrams_heatmap(model, mel_ngrams)[0]
```
The `[0]` means that we are displaying the first of the two outputs from the method.

In [16]:
chart, chart_df = viz.plot_ngrams_heatmap(model, mel_ngrams)

In [17]:
chart_df

Unnamed: 0,start,end,pattern,voices
0,4.0,8.0,"1, 1, 1, 2",[Discantus]
1,8.0,12.0,"1, 1, 2, 1",[Discantus]
2,12.0,16.0,"1, 2, 1, 1",[Discantus]
3,16.0,20.0,"2, 1, 1, 1",[Discantus]
4,20.0,24.0,"1, 1, 1, 2",[Discantus]
...,...,...,...,...
1975,1480.0,1484.0,"4, 1, 1, 1",[Bassus]
1976,1484.0,1488.0,"1, 1, 1, 1",[Bassus]
1977,1488.0,1492.0,"1, 1, 1, 1",[Bassus]
1978,1492.0,1496.0,"1, 1, 1, 1",[Bassus]


In [18]:
chart

In [19]:
out2 = mel_ngrams[mel_ngrams == '1, 2, 1, 2'].stack().dropna().to_frame()
# out2.reset_index(inplace=True)
# out2["measure"] = out2['level_0']/8+1
# out2
out2.index.to_list()


[(300.0, '[Contratenor]'),
 (300.0, '[QuintaVox]'),
 (304.0, '[Discantus]'),
 (328.0, '[Tenor]')]

Plot visualizations for specific patterns from ngrams

In [20]:
viz.plot_ngrams_heatmap(model, mel_ngrams, patterns=['1, 1, 1, 1'])[0]

In [21]:
x = len(col_one_list)-10
a = list(combinations(col_one_list, x))
a

NameError: name 'col_one_list' is not defined

# Harmonic Intervals between Voices

In [22]:
har = model.getHarmonic()
regHar = model.regularize(df=har, unit=2)
regHar

ValueError: index must be monotonic increasing or decreasing

In [23]:
harm = model.getHarmonic(kind="d", compound=True)
h_ng = model.getNgrams(df=harm, how='modules', exclude=['Rest'], cell_type="str")
h_ng.stack().value_counts().to_frame().head(50)


Unnamed: 0,0
"3_Held, 4_Held, 5",16
"10_2, 9_2, 8",13
"1_Held, 2_Held, 3",12
"8_2, 7_2, 6",12
"6_-2, 7_Held, 6",12
"7_Held, 6_Held, 6",11
"3_2, 2_2, 1",11
"10_Held, 11_Held, 12",10
"3_-2, 3_-2, 3",10
"8_2, 7_Held, 6",9


In [24]:
h_ng[h_ng == '-2_-3, 3_Held, 4'].stack().dropna().to_frame()

Unnamed: 0,Unnamed: 1,0
123.0,[Bassus]_[Contratenor],"-2_-3, 3_Held, 4"
1435.0,[Tenor]_[Contratenor],"-2_-3, 3_Held, 4"
537.0,[QuintaVox]_[Contratenor],"-2_-3, 3_Held, 4"


Plot visualizations for harmonic intervals between voices

In [25]:
h_ng[h_ng == '-3_Held, -2_4, -4'].stack().dropna().to_frame()

Unnamed: 0,Unnamed: 1,0
1688/3,[QuintaVox]_[Contratenor],"-3_Held, -2_4, -4"


In [26]:
chart, chart_df = viz.plot_ngrams_heatmap(model, h_ng, patterns=['-2_-3, 3_Held, 4', '-3_Held, -2_4, -4'], voices=['[QuintaVox]_[Contratenor]'])
chart

In [27]:
chart_df

Unnamed: 0,start,end,pattern,voices
330,537.0,1332.0,"-2_-3, 3_Held, 4",[QuintaVox]_[Contratenor]
537,562.666667,716.0,"-3_Held, -2_4, -4",[QuintaVox]_[Contratenor]


In [28]:
out3 = h_ng[h_ng == '7_-, 6_-2, 8'].stack().dropna().to_frame()
out3.reset_index(inplace=True)
out3["measure"] = out3['level_0']/8+1
out3

Unnamed: 0,level_0,level_1,0,measure


# Two-Voice Modules as N-Grams

* 'modules' is in fact the default
* 'unit' refers to the durational increment


In [29]:
ng = model.getNgrams(df=harm, how='columnwise', exclude=['Rest'])
ng.head()
ng.stack().to_frame()

Unnamed: 0,Unnamed: 1,0
0.0,[Contratenor]_[Discantus],"8, 6, 2"
4.0,[Contratenor]_[Discantus],"8, 7, 5"
7.0,[Contratenor]_[Discantus],"7, 5, 8"
8.0,[Contratenor]_[Discantus],"6, 2, 5"
10.0,[Contratenor]_[Discantus],"5, 8, 1"
...,...,...
1480.0,[Bassus]_[QuintaVox],"5, 8, 9"
1484.0,[QuintaVox]_[Tenor],"-2, -6, -7"
1484.0,[QuintaVox]_[Contratenor],"5, -4, 1"
1492.0,[Tenor]_[Discantus],"10, 6, 9"


In [30]:
viz.plot_ngrams_heatmap(model, ng)[0]

In [31]:
ng2 = ng[ng.apply(lambda row: row.astype(str).str.contains('7, -, 6, -2, 8').any(), axis=1)].copy()
# find_soggetto.reset_index(inplace=True)
# find_soggetto["measure"] = find_soggetto['index']/8+1
ng2.head(50)

Unnamed: 0,[Bassus]_[SextaVox],[Bassus]_[QuintaVox],[Bassus]_[Tenor],[Bassus]_[Contratenor],[Bassus]_[Discantus],[SextaVox]_[QuintaVox],[SextaVox]_[Tenor],[SextaVox]_[Contratenor],[SextaVox]_[Discantus],[QuintaVox]_[Tenor],[QuintaVox]_[Contratenor],[QuintaVox]_[Discantus],[Tenor]_[Contratenor],[Tenor]_[Discantus],[Contratenor]_[Discantus]


# Filter by Any String of Intervals

In [32]:


filtered = ng[ng.apply(lambda row: row.astype(str).str.contains('7').any(), axis=1)].copy()
# filtered = ng[ng.apply(lambda row: row.astype(str).str.contains('6_-2, 6_-2, 6').any(), axis=1)].copy()

filtered.reset_index(inplace=True)
filtered["measure"] = filtered['index']/8+1
filtered



Unnamed: 0,index,[Bassus]_[SextaVox],[Bassus]_[QuintaVox],[Bassus]_[Tenor],[Bassus]_[Contratenor],[Bassus]_[Discantus],[SextaVox]_[QuintaVox],[SextaVox]_[Tenor],[SextaVox]_[Contratenor],[SextaVox]_[Discantus],[QuintaVox]_[Tenor],[QuintaVox]_[Contratenor],[QuintaVox]_[Discantus],[Tenor]_[Contratenor],[Tenor]_[Discantus],[Contratenor]_[Discantus],measure
0,4.0,,,,,,,,,,,,,,,"8, 7, 5",1.5
1,7.0,,,,,,,,,,,,,,,"7, 5, 8",1.875
2,32.0,,,,,,,"8, 8, 7","15, 15, 14","17, 17, 16",,,,,,,5.0
3,36.0,,,,,,,"8, 7, 6",,"17, 16, 13",,,,,,,5.5
4,39.0,,,,,,,"7, 6, 8",,"16, 13, 15",,,,,,,5.875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280,1470.0,,,"3, 4, 1",,,,"3, 4, 5",,,"-6, -5, -8",,,"8, 7, 10","10, 9, 12",,184.75
281,1471.0,,,"4, 1, 5",,,,"4, 5, 9",,,"-5, -8, -4",,,"7, 10, 6","9, 12, 8",,184.875
282,1472.0,,,,"10, 7, 5",,"12, 1, 2",,"14, 12, 8",,"-4, 3, 2",,,,,"3, 6, 5",185.0
283,1474.0,,,"1, 5, 1",,,,"5, 9, 8",,,"-8, -4, -3",,,"10, 6, 7","12, 8, 10",,185.25


## Ngrams with Real Durations

In [33]:
print(model.getNgrams.__doc__)

 Group sequences of observations in a sliding window "n" events long
        (default n=3). These cells of the resulting DataFrame can be grouped as 
        desired by setting `cell_type` to `tuple` (default), `list`, or `str`. 
        If the `exclude` parameter is passed, if any item in that list is found 
        in an ngram, that ngram will be removed from the resulting DataFrame. 
        Since `exclude` defaults to `['Rest']`, pass an empty list if you want 
        to allow rests in your ngrams.

        There are two primary modes for the `how` parameter. When set to
        "columnwise" (default), this is the simple case where the events in each
        column of the `df` DataFrame has its events grouped at the offset of the
        first event in the window. For example, to get 4-grams of melodic
        intervals:

        ip = ImportedPiece('path_to_piece')
        ngrams = ip.getNgrams(df=ip.getMelodic(), n=4)

        If `how` is set to 'modules' this will return contrap

In [34]:
ng = model.getNgrams(how='modules', cell_type=str)
ng


Unnamed: 0,[Contratenor]_[Discantus],[Tenor]_[Contratenor],[Tenor]_[Discantus],[QuintaVox]_[Tenor],[QuintaVox]_[Contratenor],[QuintaVox]_[Discantus],[SextaVox]_[QuintaVox],[SextaVox]_[Tenor],[SextaVox]_[Contratenor],[SextaVox]_[Discantus],[Bassus]_[SextaVox],[Bassus]_[QuintaVox],[Bassus]_[Tenor],[Bassus]_[Contratenor],[Bassus]_[Discantus]
0.0,"8_1, 8_2, 7",,,,,,,,,,,,,,
4.0,"8_2, 7_2, 6",,,,,,,,,,,,,,
7.0,"7_2, 6_2, 5",,,,,,,,,,,,,,
8.0,"6_2, 5_-4, 8",,,,,,,,,,,,,,
10.0,"5_-4, 8_8, 1",,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1308.0,,,,,,,,,,,,,,"6_-3, 9_4, 6","9_-3, 12_4, 9"
743.0,,,,,,,,,,,,,,,"11_3, 10_Held, 8"
1294.0,,,,,,,,,,,,,,,"8_Held, 9_-3, 11"
1296.0,,,,,,,,,,,,,,,"9_-3, 11_1, 12"


In [35]:
filtered = ng[ng.apply(lambda row: row.astype(str).str.contains('6_-2, 8').any(), axis=1)].copy()
filtered.reset_index(inplace=True)
# filtered["measure"] = filtered['index']/8+1
filtered


Unnamed: 0,index,[Contratenor]_[Discantus],[Tenor]_[Contratenor],[Tenor]_[Discantus],[QuintaVox]_[Tenor],[QuintaVox]_[Contratenor],[QuintaVox]_[Discantus],[SextaVox]_[QuintaVox],[SextaVox]_[Tenor],[SextaVox]_[Contratenor],[SextaVox]_[Discantus],[Bassus]_[SextaVox],[Bassus]_[QuintaVox],[Bassus]_[Tenor],[Bassus]_[Contratenor],[Bassus]_[Discantus]
0,109.0,"5_1, 5_2, 4",,,,"4_-5, 8_4, 6",,,,"6_Held, 6_-2, 8",,,,,,
1,168.0,"6_4, 3_2, 1",,,,"-3_-2, 3_2, 3","4_-2, 5_2, 3","7_Held, 6_-2, 8",,"5_Held, 8_-2, 10",,,,,,
2,170.0,"3_2, 1_-2, 4",,,,"3_2, 3_2, 1","5_2, 3_2, 4","6_-2, 8_-2, 10",,"8_-2, 10_-2, 10",,,,,,
3,182.0,"7_2, 6_-2, 8","-2_Held, 1_2, -3",,,,,,,"4_Held, 5_-5, 8",,,,,"4_Held, 5_Held, 4",
4,498.0,"3_Held, 1_-2, 3",,,,,"8_Held, 6_-2, 8",,,,,,,,,"12_Held, 10_2, 10"
5,499.0,"1_-2, 3_Held, 5",,,,,"6_-2, 8_-3, 12",,,,,,,,,"10_2, 10_Held, 12"
6,572.0,"9_-3, 10_Held, 11",,,,"-3_Held, -5_-2, -4","7_Held, 6_-2, 8",,,,,,,,"3_Held, 1_-5, 5","11_Held, 10_-5, 15"
7,688.0,"8_-3, 10_2, 8","4_-2, 3_2, 3","11_-2, 12_2, 10",,,,,"7_Held, 6_-2, 8","10_Held, 8_-2, 10",,,,,,
8,692.0,"8_Held, 6_-2, 8",,,,,"3_Held, 1_-2, 3",,,,,,,,,
9,712.0,"8_Held, 6_-2, 8",,,,,"3_Held, 1_-2, 3",,,,,,,,,


In [36]:
df = ng.value_counts()
df = ng.stack().value_counts()
df.tail(50)

8_Held, 5_-2, 8       1
3_Held, 3_-2, 5       1
-2_2, -3_-3, 1        1
5_-2, 6_-3, 9         1
10_2, 9_-3, 10        1
5_2, 3_-4, 6          1
11_Held, 10_5, 5      1
2_2, 2_-2, 4          1
-5_-2, -3_Held, -2    1
4_Held, 3_2, 2        1
-3_2, -3_-3, 3        1
4_-3, 6_Held, 10      1
3_Held, 4_-5, 9       1
1_-2, 2_-2, 4         1
1_-2, 2_2, 2          1
8_Held, 7_-3, 10      1
3_Held, 3_Held, 8     1
5_-4, 8_4, 3          1
5_1, 6_2, 5           1
4_Held, 3_-3, 3       1
3_2, 3_-5, 8          1
10_2, 10_Held, 12     1
-5_2, -6_2, -3        1
11_2, 10_-2, 11       1
12_5, 8_Held, 5       1
5_2, 5_2, 4           1
10_Held, 9_8, 4       1
5_4, 3_Held, 4        1
3_-2, 4_2, 4          1
5_-3, 7_2, 3          1
5_-3, 6_2, 3          1
4_-2, 8_Held, 7       1
-4_1, -4_Held, -5     1
-2_1, 1_Held, 2       1
8_Held, 10_-2, 10     1
-5_2, -5_-2, -3       1
-5_-5, 8_Held, 7      1
-3_-3, 3_2, 2         1
3_-3, 4_2, 6          1
6_1, 6_-2, 8          1
-7_Held, -7_-2, -6    1
3_2, 3_Held, 1  

In [37]:
corpus = CorpusBase(['https://crimproject.org/mei/CRIM_Mass_0015_2.mei'])
model = corpus.scores[0]
mel = model.getMelodic(kind='d')
mel_ngrams = model.getNgrams(df=mel, n=4, cell_type=str)


Requesting file from https://crimproject.org/mei/CRIM_Mass_0015_2.mei...
Successfully imported.


In [38]:
viz.plot_ngrams_heatmap(model, mel_ngrams)[0]

In [41]:
viz.plot_ngrams_heatmap(model, mel_ngrams, patterns=['-2, -2, -2, -2'])[0]

In [42]:
out = mel_ngrams[mel_ngrams == '-2, -3, 2, -2'].stack().dropna()
out
# out.reset_index(inplace=True)
# out["measure"] = out['index']/8+1

157.0   Contratenor [2]    -2, -3, 2, -2
288.0   Contratenor [1]    -2, -3, 2, -2
464.0   Tenor              -2, -3, 2, -2
488.0   Bassus [1]         -2, -3, 2, -2
1132.0  Discantus          -2, -3, 2, -2
1136.0  Tenor              -2, -3, 2, -2
1144.0  Discantus          -2, -3, 2, -2
1156.0  Bassus [2]         -2, -3, 2, -2
1164.0  Contratenor [2]    -2, -3, 2, -2
1168.0  Bassus [2]         -2, -3, 2, -2
1192.0  Discantus          -2, -3, 2, -2
1196.0  Bassus [1]         -2, -3, 2, -2
1204.0  Contratenor [1]    -2, -3, 2, -2
1208.0  Bassus [1]         -2, -3, 2, -2
1216.0  Contratenor [1]    -2, -3, 2, -2
dtype: object

In [43]:
corpus = CorpusBase(['https://crimproject.org/mei/CRIM_Model_0017.mei'])
model = corpus.scores[0]
har = model.getHarmonic(kind='d')
har_ngrams = model.getNgrams(how='modules', df=har, n=3, cell_type=str)
# har_ngrams.to_csv('model_17_harmonic')

har_ngrams[har_ngrams == '3_Held, 1_-2, 3'].stack().dropna()



Memoized piece detected...


498.0  [Contratenor]_[Discantus]    3_Held, 1_-2, 3
692.0  [QuintaVox]_[Discantus]      3_Held, 1_-2, 3
708.0  [Tenor]_[Contratenor]        3_Held, 1_-2, 3
712.0  [QuintaVox]_[Discantus]      3_Held, 1_-2, 3
592.0  [Bassus]_[Tenor]             3_Held, 1_-2, 3
dtype: object

## Test models methods

Here I create test for the new methods I developed

In [44]:
files = [title.split("/")[-1] for title in titles]

In [None]:
prefix = "https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_3.0/"
for file in files:
    corpus = CorpusBase([prefix + file])
    model = corpus.scores[0]
    print(model.getSoundingCount().values)