<a href="https://colab.research.google.com/github/klimereo/it-project/blob/main/IT_Chord_Conditional_Entropy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Functions and Files



## Importing Libraries

In [None]:
!pip install pyitlib

In [74]:
import numpy as np
from pyitlib import discrete_random_variable as drv
import pandas as pd
from collections import Counter
from collections import defaultdict

## Importing Files

### Baroque / Classical



In [9]:
df_bach847 = pd.read_csv('/content/bach_847_rawmatrix.csv', sep='\t')
df_bachgoldberg = pd.read_csv ('/content/goldberg_rawmatrix.csv', sep='\t')
df_haydn = pd.read_csv ('/content/haydn50_rawmatrix.csv', sep='\t')
df_mozart = pd.read_csv('/content/mozart_turca_rawmatrix.csv', sep='\t')

In [10]:
print(df_bach847.shape)
print(df_bachgoldberg.shape)
print(df_haydn.shape)
print(df_mozart.shape)

(88, 1134)
(88, 1247)
(88, 2726)
(88, 1542)


### Romantic


In [11]:
df_chopin = pd.read_csv('/content/chopinwaltz_rawmatrix.csv', sep='\t')
df_schubert = pd.read_csv('/content/schubertimp_rawmatrix.csv', sep='\t')
df_tchaikovsky = pd.read_csv('/content/tchaikovsky_rawmatrix.csv', sep='\t')
df_liszt = pd.read_csv('/content/liszt_rawmatrix.csv', sep='\t')

In [12]:
print(df_chopin.shape)
print(df_schubert.shape)
print(df_tchaikovsky.shape)
print(df_liszt.shape)

(88, 1931)
(88, 2263)
(88, 1826)
(88, 1877)


### Early Modern

In [13]:
df_schoenberg = pd.read_csv('/content/schoenberg_rawmatrix.csv', sep='\t')
df_bartok = pd.read_csv ('/content/bartoksonata_rawmatrix.csv', sep='\t')
df_debussy = pd.read_csv('/content/debussyprelude_rawmatrix.csv', sep='\t')
df_scriabin = pd.read_csv('/content/scriabin_vers_rawmatrix.csv', sep='\t')

In [14]:
print(df_schoenberg.shape)
print(df_bartok.shape)
print(df_debussy.shape)
print(df_scriabin.shape)

(88, 1116)
(88, 1971)
(88, 1379)
(88, 2403)


### Contemporary

In [15]:
df_xenakis = pd.read_csv('/content/xenakis_evyrali_rawmatrix.csv', sep='\t')
df_messiaen = pd.read_csv('/content/messiaen_lesagnes_rawmatrix.csv', sep='\t')
df_ligeti = pd.read_csv('/content/ligeti_disordre_rawmatrix.csv', sep='\t')
df_boulez = pd.read_csv('/content/boulez_sonata_rawmatrix.csv', sep='\t')

In [16]:
print(df_xenakis.shape)
print(df_messiaen.shape)
print(df_ligeti.shape)
print(df_boulez.shape)

(88, 4726)
(88, 1431)
(88, 1014)
(88, 1242)


# Entropy over probability distribution of distinct vertical elements

## Functions

1. Extract chords from each column
2. Create probability distribution of types of chords
3. Calculate entropy

In [24]:
def extract_nonzero_indices_v2(df):
    # Initialize an empty list to store the sets of non-zero indices
    nonzero_indices = []
    
    # Iterate over columns
    for col in df.columns:
        # Extract non-zero indices for current column
        nonzero_indices_set = set(df[col][df[col] != 0].index)
        if len(nonzero_indices_set) == 0:
            nonzero_indices_set = {0,0,0}
        elif len(nonzero_indices_set) > 70:
            continue
        
        # Append the set of non-zero indices to the list
        nonzero_indices.append(nonzero_indices_set)
    
    return nonzero_indices

In [30]:
def probability_distribution_of_types(sets_list):
    # Convert the sets to strings
    sets_list = [str(s) for s in sets_list]
    # Count the occurrences of each set in the input list
    set_counts = Counter(sets_list)
    
    # Calculate the total number of sets
    total_sets = len(sets_list)
    
    # Initialize an empty dictionary to store the probability distribution
    probability_distribution = {}
    
    # Iterate over the set counts
    for set_item, count in set_counts.items():
        # Calculate the probability of the set
        probability = count / total_sets
        
        # Add the set and its probability to the dictionary
        probability_distribution[set_item] = probability
    
    return probability_distribution

In [43]:
def dict_vals_to_array(dictionary):
  # Initialize an empty NumPy array
  arr = np.array([])

  # Iterate over the values in the dictionary
  for value in dictionary.values():
    # Append the value to the array
    arr = np.append(arr, value)

  return arr

In [45]:
def entropy_pipeline(dataframe):
  chords = extract_nonzero_indices_v2(dataframe)
  chords_probs = probability_distribution_of_types(chords)
  array_probabilities = dict_vals_to_array(chords_probs)
  entropyOfPiece = drv.entropy_pmf(array_probabilities)

  return entropyOfPiece

## Application to datasets

### Baroque

In [47]:
bach847_chordtropy = entropy_pipeline(df_bach847)
print(bach847_chordtropy)

7.952256964919892


In [48]:
bachgoldberg_chordtropy = entropy_pipeline(df_bachgoldberg)
print(bachgoldberg_chordtropy)

7.315152599024176


In [49]:
haydn_chordtropy = entropy_pipeline(df_haydn)
print(haydn_chordtropy)

7.6342687846314545


In [50]:
mozart_chordtropy = entropy_pipeline(df_mozart)
print(mozart_chordtropy)

6.5878735346753


In [51]:
baroque_classical_chordtropy = {'Bach 847': bach847_chordtropy, 
                                'Bach Goldberg 1': bachgoldberg_chordtropy,
                                'Haydn Sonata 50': haydn_chordtropy, 
                                'Mozart Alla Turca': mozart_chordtropy}

In [52]:
import json

with open("barok_chordtropy.json", "w") as outfile:
    json.dump(baroque_classical_chordtropy, outfile)

### Romantic

In [53]:
chopin_chordtropy = entropy_pipeline(df_chopin)
print(chopin_chordtropy)

7.098127595373777


In [54]:
schubert_chordtropy = entropy_pipeline(df_schubert)
print(schubert_chordtropy)

8.721652769175355


In [55]:
tchaikovsky_chordtropy = entropy_pipeline(df_tchaikovsky)
print(tchaikovsky_chordtropy)

7.920282880730454


In [56]:
liszt_chordtropy = entropy_pipeline(df_liszt)
print(liszt_chordtropy)

8.57324764836548


In [58]:
romantic_chordtropies = {'Chopin Waltz': chopin_chordtropy,
                        'Lizst Liebestraum': liszt_chordtropy,
                        'Schubert Impromptu': schubert_chordtropy,
                        'Tchaikovsky June': tchaikovsky_chordtropy}

In [59]:
import json

with open("romantik_chordtropy.json", "w") as outfile:
    json.dump(romantic_chordtropies, outfile)

### Early Modern

In [60]:
schoenberg_chordtropy = entropy_pipeline(df_schoenberg)
print(schoenberg_chordtropy)

6.722630681860103


In [61]:
debussy_chordtropy = entropy_pipeline(df_debussy)
print(debussy_chordtropy)

6.990396074727471


In [62]:
bartok_chordtropy = entropy_pipeline(df_bartok)
print(bartok_chordtropy)

8.1563277464837


In [63]:
scriabin_chordtropy = entropy_pipeline(df_scriabin)
print(scriabin_chordtropy)

7.888174106931777


In [70]:
earlymodern_chordtropies = {'Scriabin Vers La Flamme': scriabin_chordtropy,
                           'Debussy Prelude 8:' : debussy_chordtropy,
                           'Schoenberg op11mvt3': schoenberg_chordtropy,
                           'Bartok Sonata 1 Mvt1': bartok_chordtropy}

In [71]:
with open("earlymodern_chordtropy.json", "w") as outfile1:
    json.dump(earlymodern_chordtropies, outfile1)

### Contemporary

In [64]:
xenakis_chordtropy = entropy_pipeline(df_xenakis)
print(xenakis_chordtropy)

6.29094824631885


In [67]:
messiaen_chordtropy = entropy_pipeline(df_messiaen)
print(messiaen_chordtropy)

7.119414223160959


In [68]:
ligeti_chordtropy = entropy_pipeline(df_ligeti)
print(ligeti_chordtropy)

9.657633840363825


In [69]:
boulez_chordtropy = entropy_pipeline(df_boulez)
print(boulez_chordtropy)

4.630614509178226


In [72]:
contemporary_chordtropies = {'Messiaen Les Anges': messiaen_chordtropy,
                             'Boulez Premier Sonata': boulez_chordtropy,
                             'Xenakis Evryali': xenakis_chordtropy,
                             'Ligeti Disordre' : ligeti_chordtropy}

In [73]:
with open("contemporary_chordtropy.json", "w") as outfile1:
    json.dump(contemporary_chordtropies, outfile1)

# Condition entropy over condition probability matrix of distinct vertical elements

## Functions

In [142]:
def assign_labels(sets_list):
    set_labels = {}
    label_counter = 1
    labels_list = []
    for s in sets_list:
        # convert set to a tuple for hashing
        s = tuple(s)
        if s not in set_labels:
            set_labels[s] = label_counter
            label_counter += 1
        labels_list.append(set_labels[s])
    return labels_list

In [154]:
def bigram_probabilities(labels_list):
    # Create a dictionary to store the bigram counts
    bigram_counts = {}
    
    # Iterate over the labels and count the bigrams
    for i in range(len(labels_list)-1):
        bigram = (labels_list[i], labels_list[i+1])
        if bigram in bigram_counts:
            bigram_counts[bigram] += 1
        else:
            bigram_counts[bigram] = 1
    
    # Get the unique labels
    unique_labels = list(set(labels_list))
    n = len(unique_labels)
    
    # Initialize the bigram probability matrix
    bigram_probabilities = np.zeros((n, n))
    
    # Iterate over the bigram counts and compute the probabilities
    for bigram, count in bigram_counts.items():
        bigram_probabilities[unique_labels.index(bigram[0]), unique_labels.index(bigram[1])] = count / (labels_list.count(bigram[0])+1e-20)
    
    return bigram_probabilities

In [201]:
def conditional_entropy_pipeline(df):
  chord_sequence = extract_nonzero_indices_v2(df)
  tokenised_chords = assign_labels(chord_sequence)
  bigram_probs = pd.DataFrame(bigram_probabilities(tokenised_chords))

  row_entropies = []

  for index,row in bigram_probs.iterrows():
    try:
      row_entropy = drv.entropy_pmf(list(row))
      row_entropies.append(row_entropy)
    except:
      continue

  return sum(row_entropies)

## Baroque / Classical

In [204]:
goldberg_condentropy = conditional_entropy_pipeline(df_bachgoldberg)
print(goldberg_condentropy)

260.2332869358936


In [206]:
bach847_condentropy = conditional_entropy_pipeline(df_bach847)
print(bach847_condentropy)

260.301620498294


In [207]:
haydn_condentropy = conditional_entropy_pipeline(df_haydn)
print(haydn_condentropy)

433.0571739830884


In [208]:
mozart_condentropy = conditional_entropy_pipeline(df_mozart)
print(mozart_condentropy)

180.2051383029351


In [209]:
baroque_classical_condtropies = {'Bach 847': bach847_condentropy,
                                 'Bach Goldberg':goldberg_condentropy,
                                 'Haydn Sonata 50': haydn_condentropy,
                                 'Mozart Alla Turca': mozart_condentropy}

In [210]:
import json

with open("baroque_condentropy.json", "w") as outfile:
    json.dump(baroque_classical_condtropies, outfile)

## Romantic

In [211]:
chopin_condentropy = conditional_entropy_pipeline(df_chopin)
print(chopin_condentropy)

303.33028174045273


In [212]:
liszt_condentropy = conditional_entropy_pipeline(df_liszt)
print(liszt_condentropy)

500.3907329033592


In [213]:
schubert_condentropy = conditional_entropy_pipeline(df_schubert)
print(schubert_condentropy)

248.44140206839475


In [214]:
tchaikovsky_condentropy = conditional_entropy_pipeline(df_tchaikovsky)
print(tchaikovsky_condentropy)

345.8928258753354


In [215]:
romantic_condtropies = {'Chopin Waltz': chopin_condentropy,
                        'Liszt Liebestraum': liszt_condentropy,
                        'Schubert Impromptu': schubert_condentropy,
                        'Tchaikovsky June': tchaikovsky_condentropy}

In [216]:
with open("romantic_condentropy.json", "w") as outfile:
    json.dump(romantic_condtropies, outfile)

## Early Modern

In [217]:
schoenberg_condentropy = conditional_entropy_pipeline(df_schoenberg)
print(schoenberg_condentropy)

168.63094980884395


In [218]:
bartok_condentropy = conditional_entropy_pipeline(df_bartok)
print(bartok_condentropy)

408.4436479836422


In [219]:
debussy_condentropy = conditional_entropy_pipeline(df_debussy)
print(debussy_condentropy)

153.86909093947702


In [220]:
scriabin_condentropy = conditional_entropy_pipeline(df_scriabin)
print(scriabin_condentropy)

405.25228866058404


In [221]:
earlymodern_condtropies = {'Schoenberg Klaiverstücke': schoenberg_condentropy,
                      'Debussy Prelude': debussy_condentropy,
                      'Scriabin Vers': scriabin_condentropy,
                      'Bartok Sonata': bartok_condentropy}

In [222]:
with open("earlymodern_condentropy.json", "w") as outfile:
    json.dump(earlymodern_condtropies, outfile)

## Contemporary 

In [313]:
xenakis_condentropy = conditional_entropy_pipeline(df_xenakis)
print(xenakis_condentropy)

400.60023282448003


In [246]:
messiaen_condentropy = conditional_entropy_pipeline(df_messiaen)
print(messiaen_condentropy)

231.0447097703268


In [244]:
ligeti_condentropy = conditional_entropy_pipeline(df_ligeti)
print(ligeti_condentropy)

98.15475932144545


In [241]:
boulez_condentropy = conditional_entropy_pipeline(df_boulez)
print(boulez_condentropy)

125.4873747921154


In [315]:
contemporary_condtropies = {'Xenakis Evyrail': xenakis_condentropy,
                            'Messiaen Les Anges': messiaen_condentropy,
                            'Ligeti Disordre': ligeti_condentropy,
                            'Boulez Sonata 1': boulez_condentropy}

In [316]:
with open("contemporary_condentropy.json", "w") as outfile:
    json.dump(contemporary_condtropies, outfile)

# Playing with conditional_entropy_pipeline

In [294]:
def generate_matrix_deterministic(n_rows, n_cols):
    matrix = np.zeros((n_rows, n_cols))
    for i in range(n_rows):
        random_col = np.random.randint(0, n_cols)
        matrix[i, random_col] = 1
    return matrix

In [295]:
def generate_matrix_half(n_rows, n_cols):
    matrix = np.zeros((n_rows, n_cols))
    for i in range(n_rows):
        random_cols = np.random.choice(n_cols, 2, replace=False)
        matrix[i, random_cols] = 0.5
    return matrix

In [310]:
def generate_matrix_stochastic(n_rows, n_cols):
    matrix = np.random.rand(n_rows,n_cols)
    matrix = matrix/np.sum(matrix,axis=1)[:,np.newaxis]
    return matrix

In [302]:
def test_pipeline(random_matrix):
  random_matrix = pd.DataFrame(random_matrix)
  row_entropies = []

  for index,row in random_matrix.iterrows():
    try:
      row_entropy = drv.entropy_pmf(list(row))
      row_entropies.append(row_entropy)
    except:
      continue
  
  return sum(row_entropies)

In [311]:
dimension = 500

deterministic_10x10 = generate_matrix_deterministic(dimension, dimension)
half_10x10 = generate_matrix_half(dimension, dimension)
trulyrandom_10x10 = generate_matrix_stochastic(dimension, dimension)

In [317]:
print("Deterministic Transition Matrix:", test_pipeline(deterministic_10x10))
print("--------------------------------------------------------------------")
print(".5 / .5 Transition:", test_pipeline(half_10x10))
print("--------------------------------------------------------------------")
print("Stochastic:", test_pipeline(trulyrandom_10x10))

Deterministic Transition Matrix: 0.0
--------------------------------------------------------------------
.5 / .5 Transition: 500.0
--------------------------------------------------------------------
Stochastic: 4343.703236810934
