# This NB demonstrates how we can use plot_ngrams_heatmap

We can use the method to plot all patterns, double click on specific patterns to highlight them in the heatmaps, and select top patterns to plot them on their own.

In [1]:
%cd ../intervals/
from main import *
import pandas as pd
import altair as alt
import visualizations as viz

/Users/dangtrang/OneDrive - brynmawr.edu/summer 2021/crim_intervals/intervals


In [2]:
root = "https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/"
prefix = "CRIM_Model_00"
files = ["17"] 
postfix = ".mei"

## Overall of what `plot_ngrams_heatmap` can do

First, we do the normal steps to get a ngrams dataframe.

In [3]:
corpus = CorpusBase([root + prefix + files[0] + postfix])
model = corpus.scores[0]
mel = model.getMelodic(kind='d', compound=False, unit=0)
mel_ngrams = model.getNgrams(df=mel, n=5, cell_type=str)

Requesting file from https://raw.githubusercontent.com/CRIM-Project/CRIM-online/master/crim/static/mei/MEI_4.0/CRIM_Model_0017.mei...
Successfully imported.


Then we pass the model and the ngram df into the method `plot_ngrams_heatmap`

We could click on the pattern in the top bar chart, and `shift+click` to select more patterns to display in the second chart like [here]("http://g.recordit.co/Le550mfczV.gif")

In [4]:
mel_ngram_chart, mel_ngrams_heatmap_df = viz.plot_ngrams_heatmap(model, mel_ngrams)
mel_ngram_chart

       start     end         pattern       voices
0        4.0    84.0             NaN  [Discantus]
1        7.0    11.0             NaN  [Discantus]
2        8.0    16.0  1, 2, 1, 2, -3  [Discantus]
3       10.0    14.0             NaN  [Discantus]
4       12.0    16.0             NaN  [Discantus]
...      ...     ...             ...          ...
3823  1466.0  1468.0             NaN     [Bassus]
3824  1468.0  1472.0             NaN     [Bassus]
3825  1469.0  1473.0             NaN     [Bassus]
3826  1470.0  1528.0             NaN     [Bassus]
3827  1471.0  1475.0             NaN     [Bassus]

[3828 rows x 4 columns]


In [5]:
mel_ngrams_heatmap_df.dropna()

Unnamed: 0,start,end,pattern,voices
2,8.0,16.0,"1, 2, 1, 2, -3",[Discantus]
43,88.0,90.0,"-2, 4, -2, -2, -2",[Discantus]
46,92.0,125.0,"4, -2, -2, -2, -2",[Discantus]
54,100.0,104.0,"-2, -2, -2, -2, -2",[Discantus]
58,104.0,105.333333,"-2, -2, -2, -2, 2",[Discantus]
...,...,...,...,...
3807,1448.0,1450.0,"-2, 1, 2, 2, -5",[Bassus]
3810,1452.0,1453.0,"1, 2, 2, -5, 3",[Bassus]
3813,1455.0,1459.0,"2, 2, -5, 3, -3",[Bassus]
3814,1456.0,1460.0,"2, -5, 3, -3, 1",[Bassus]


### Selecting patterns

We could only include some patterns in the heatmaps

First, I collected the top patterns and turned these patterns into a list.

In [6]:
# count and get the 10 most popular patterns
mel_ngrams_top_patterns_df = mel_ngrams.stack().value_counts().to_frame().head(10)
# retrieve a list to generate heatmaps
mel_ngrams_top_patterns_list = mel_ngrams_top_patterns_df.index.to_list()
mel_ngrams_top_patterns_df

Unnamed: 0,0
"-2, -2, -2, -2, -2",21
"2, 2, 2, 2, 2",20
"2, 2, 2, 2, -2",16
"2, 2, 2, 2, -3",14
"2, 2, 2, -3, 2",13
"2, 2, -2, -3, 2",13
"2, -2, -3, 2, -2",11
"-2, 2, 2, 2, 2",11
"-2, -3, 2, 2, 2",10
"1, 2, 2, -2, -3",10


In [7]:
mel_ngrams_top_patterns_list 

['-2, -2, -2, -2, -2',
 '2, 2, 2, 2, 2',
 '2, 2, 2, 2, -2',
 '2, 2, 2, 2, -3',
 '2, 2, 2, -3, 2',
 '2, 2, -2, -3, 2',
 '2, -2, -3, 2, -2',
 '-2, 2, 2, 2, 2',
 '-2, -3, 2, 2, 2',
 '1, 2, 2, -2, -3']

We would pass the list of patterns into the `patterns` parametter.

In [8]:
chart, mel_ngrams_matches_df = viz.plot_ngrams_heatmap(model, mel_ngrams, patterns=mel_ngrams_top_patterns_list)
chart

       start     end             pattern       voices
54     100.0   104.0  -2, -2, -2, -2, -2  [Discantus]
220    412.0     NaN      -2, 2, 2, 2, 2  [Discantus]
222    416.0     NaN      2, 2, 2, 2, -2  [Discantus]
284    539.0     NaN      2, 2, 2, 2, -2  [Discantus]
301    560.0     NaN     -2, -3, 2, 2, 2  [Discantus]
...      ...     ...                 ...          ...
3441   468.0     NaN    2, -2, -3, 2, -2     [Bassus]
3699  1112.0     NaN      2, 2, 2, 2, -3     [Bassus]
3700  1116.0  1117.0      2, 2, 2, -3, 2     [Bassus]
3727  1204.0     NaN      2, 2, 2, 2, -3     [Bassus]
3728  1208.0  1209.0      2, 2, 2, -3, 2     [Bassus]

[139 rows x 4 columns]


**Cross-checking output from the heatmap with the ngrams dataframe**

We can see that'1, 1, 1, 1, 1' is a really popular pattern. Let's view its start and end points in ngram and in the heatmap's dataframe to make sure that it has been calculated correctly

In [9]:
mel_ngrams_matches_df[mel_ngrams_matches_df['pattern'] == '1, 1, 1, 1, 1']

Unnamed: 0,start,end,pattern,voices


## Here plot_ngrams_heatmap is used with ngrams and getHarmonic

Here getHarmonic is used to retrieve patterns, and the top 10 most popular patterns are plotted. 

In [10]:
harm = model.getHarmonic(kind="d", compound=True)
h_ng = model.getNgrams(df=harm, how='modules', exclude=['Rest'], cell_type="str")
h_ng

Unnamed: 0,[Contratenor]_[Discantus],[Tenor]_[Contratenor],[Tenor]_[Discantus],[QuintaVox]_[Tenor],[QuintaVox]_[Contratenor],[QuintaVox]_[Discantus],[SextaVox]_[QuintaVox],[SextaVox]_[Tenor],[SextaVox]_[Contratenor],[SextaVox]_[Discantus],[Bassus]_[SextaVox],[Bassus]_[QuintaVox],[Bassus]_[Tenor],[Bassus]_[Contratenor],[Bassus]_[Discantus]
0.0,"8_1, 8_2, 7",,,,,,,,,,,,,,
4.0,"8_2, 7_2, 6",,,,,,,,,,,,,,
7.0,"7_2, 6_2, 5",,,,,,,,,,,,,,
8.0,"6_2, 5_-4, 8",,,,,,,,,,,,,,
10.0,"5_-4, 8_8, 1",,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1308.0,,,,,,,,,,,,,,"6_-3, 9_4, 6","9_-3, 12_4, 9"
743.0,,,,,,,,,,,,,,,"11_3, 10_Held, 8"
1294.0,,,,,,,,,,,,,,,"8_Held, 9_-3, 11"
1296.0,,,,,,,,,,,,,,,"9_-3, 11_1, 12"


In [11]:
h_ng_top_patterns_df = h_ng.stack().value_counts().to_frame().head(5)
h_ng_chart, h_ng_heat_map_df = viz.plot_ngrams_heatmap(model, h_ng, patterns=h_ng_top_patterns_df.index.to_list())
h_ng_chart

        start     end            pattern                     voices
1         4.0     7.0        8_2, 7_2, 6  [Contratenor]_[Discantus]
20       96.0    97.0       10_2, 9_2, 8  [Contratenor]_[Discantus]
24      100.0   101.0        8_2, 7_2, 6  [Contratenor]_[Discantus]
37      124.0   167.0       10_2, 9_2, 8  [Contratenor]_[Discantus]
86      550.0   551.0       10_2, 9_2, 8  [Contratenor]_[Discantus]
...       ...     ...                ...                        ...
10042   548.0   549.0  1_Held, 2_Held, 3     [Bassus]_[Contratenor]
10241  1424.0  1425.0  1_Held, 2_Held, 3     [Bassus]_[Contratenor]
10243  1426.0  1432.0  3_Held, 4_Held, 5     [Bassus]_[Contratenor]
10651    90.0    91.0        8_2, 7_2, 6     [Bassus]_[Contratenor]
11412    84.0    85.0       10_2, 9_2, 8       [Bassus]_[Discantus]

[65 rows x 4 columns]


## Alex new sliding windows feature

In [18]:
varied_size_ngrams = model.getNgrams(df=model.getMelodic(), max_n=-1, cell_type=str)
varied_size_ngrams

Unnamed: 0,[Discantus],[Contratenor],[Tenor],[QuintaVox],[SextaVox],[Bassus]
4.0,,"P1, M2, M2, m2, -P4, P8, -M2, -m2, m2, -m3, -M...",,,,
7.0,,,,,,
8.0,"P1, M2, P1, M2, -M3",,,,,
10.0,,,,,,
12.0,,,,,,
...,...,...,...,...,...,...
1469.0,,,,,,
1470.0,,,,,,
1471.0,,,,,,
1472.0,,,,,,


In [19]:
chart, df = viz.plot_ngrams_heatmap(model, varied_size_ngrams)

ValueError: operands could not be broadcast together with shapes (4440,) (672,) 

In [14]:
chart

In [15]:
df.dropna()

NameError: name 'df' is not defined