In [1]:
import requests
import altair as alt
import pandas as pd
from pandas.io.json import json_normalize

from pyvis.network import Network
from ipywidgets import interact, fixed

import re

%load_ext autoreload
%autoreload 2
%cd ../intervals
from main import *
from main_objs import *
import visualizations as viz

/Users/dangtrang/OneDrive - brynmawr.edu/summer 2021/crim_intervals/intervals


# This notebook contains the methods under development

## Ngrams close match heatmap

In [2]:
model = CorpusBase(['https://crimproject.org/mei/CRIM_Model_0017.mei']).scores[0]
mel = model.getMelodic(kind='d', compound=False, unit=0)
mel_ngram = model.getNgrams(df=mel, n=5)
mel_ngram_dur = model.getDuration(df=mel, mask_df=mel_ngram, n=5)
mel_ngram.head()

Successfully imported.


Unnamed: 0,[Discantus],[Contratenor],[Tenor],[QuintaVox],[SextaVox],[Bassus]
4.0,,"1, 2, 2, 2, -4",,,,
7.0,,"2, 2, 2, -4, 8",,,,
8.0,"1, 2, 1, 2, -3","2, 2, -4, 8, -2",,,,
10.0,,"2, -4, 8, -2, -2",,,,
12.0,,"-4, 8, -2, -2, 2",,,,


### Updated close match

#### New helper method

First, we calculate the similarity score with a method.

In [3]:
help(viz.score_ngram)

Help on function score_ngram in module visualizations:

score_ngram(ngram, method)
    This method computes the similarity between patterns based on the
    method the user selected.
    :param ngram: dataframe containing ngrams to compare.
    :param method: whatever comparison methods that accepts two
    arguments. For example:
    from strsimpy.normalized_levenshtein import NormalizedLevenshtein
    algorithm = NormalizedLevenshtein()
    score_ngram(ngram, algorithm.similarity)
    :return: a multi-indexed series containing scores indexed with
    its two patterns.



In [4]:
%%time
from strsimpy.normalized_levenshtein import NormalizedLevenshtein
algorithm = NormalizedLevenshtein()
score_ser = viz.score_ngram(mel_ngram, algorithm.similarity)

CPU times: user 4.18 s, sys: 39.2 ms, total: 4.22 s
Wall time: 4.23 s


In [5]:
score_ser.head()

pattern         other           
1, 2, 2, 2, -4  1, 2, 2, 2, -4      1.0
                2, 2, 2, -4, 8      0.6
                1, 2, 1, 2, -3      0.6
                2, 2, -4, 8, -2     0.2
                2, -4, 8, -2, -2    0.0
Name: score, dtype: float64

In [44]:
mel_ngram.stack().value_counts()

-2, -2, -2, -2, -2    21
2, 2, 2, 2, 2         20
2, 2, 2, 2, -2        16
2, 2, 2, 2, -3        14
2, 2, 2, -3, 2        13
                      ..
-2, 3, 2, 2, 2         1
2, 2, -5, 2, -2        1
-5, 5, -5, 2, 2        1
2, -2, 1, 3, 1         1
-2, -2, 2, -5, 8       1
Length: 478, dtype: int64

In [45]:
popular_pat = '-2, -2, -2, -2, -2'
unpopular_pat = '-2, -2, 2, -5, 8'

#### New parameter `compare` to select between similarity score and distance score

This parameter makes sure that no matter the algorithm, the patterns that are *more similar* to the key pattern would be *bolder*.

- `compare='d'`: the pattern with the lower distance score would be bolder because they are more similar to the key pattern
- `compare='s'`: the pattern with higher similarity score would be bolder.     

In [50]:
help(viz.plot_close_match_heatmap)

Help on function plot_close_match_heatmap in module visualizations:

plot_close_match_heatmap(ngrams_df, key_pattern, score_df, compare, ngrams_duration=None, selected_patterns=[], voices=[], heatmap_width=800, heatmap_height=300)
    Plot how closely the other vectors match a selected vector.
    Uses the Levenshtein distance.
    :param ngrams_df: crim-intervals getNgram's output
    :param key_pattern: a pattern the users selected to compare other patterns with (str)
    :param score_df: dataframe containing the score for each pair of patterns.
    :param compare: 'd' if compare distance, 's' if compare similarity. The chart
    would be colored bolder if the pattern are more different/similar based on the
    parameters.
    :param ngrams_duration: if None, simply output the offsets. If the users input a
    list of durations, calculate the end by adding durations with offsets and
    display the end on the heatmap accordingly. (optional)
    :param selected_patterns: list of speci

In [36]:
viz.plot_close_match_heatmap(mel_ngram, popular_pat, score_ser, 's', mel_ngram_dur)

In [37]:
viz.plot_close_match_heatmap(mel_ngram, unpopular_pat, score_ser, 's', mel_ngram_dur)

***Distance***

In [41]:
manhattanDis = model.getDistance(df=mel_ngram)

In [46]:
viz.plot_close_match_heatmap(mel_ngram, popular_pat, manhattanDis, 'd', mel_ngram_dur)

In [47]:
viz.plot_close_match_heatmap(mel_ngram, unpopular_pat, manhattanDis, 'd', mel_ngram_dur)

## Networks of pieces

In [48]:
import community as community_louvain
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx

df = viz._trim_and_combine_piece_ids_with_measures(df_relationships)

# add the nodes and edges into graphs
G = nx.MultiGraph()
G.add_nodes_from(df['model_observation.piece.piece_id'])
G.add_nodes_from(df['derivative_observation.piece.piece_id'])

for row in df.index:
    G.add_edge(df['model_observation.piece.piece_id'].loc[row], 
              df['derivative_observation.piece.piece_id'].loc[row])

# compute the best partition
partition = community_louvain.best_partition(G)

for node in partition:
    G.nodes[node]['group'] = partition[node]

#convert to pyvis
nt = Network(notebook=True)
nt.from_nx(G)
nt.show('../CRIM_Intervals_Notebooks/community.html')