In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [14]:
from google.colab import files
uploaded = files.upload()

Saving beethoven.txt to beethoven.txt
Saving haydn.txt to haydn.txt
Saving mozart.txt to mozart (1).txt
Saving turca.txt to turca.txt


# Mozart

In [142]:
column_names = ['Order', 'End', 'Chord', 'Root', 'Key', 'Function', 'Sequence', 'File']
df = pd.read_csv("mozart.txt", sep=':', names=column_names)
df.replace({'null': None, np.nan: None, ' null':None}, inplace=True)
df['Sequence'] = df['Sequence'].apply(lambda x: x.strip('[]').split(', ') if x is not None else None)
df

Unnamed: 0,Order,End,Chord,Root,Key,Function,Sequence,File
0,1,3.00,,,Bb DUR,,,moz570a_20240513233657.mid
1,2,3.00,,,Bb DUR,,,moz570a_20240513233657.mid
2,3,3.00,,,Bb DUR,,,moz570a_20240513233657.mid
3,4,3.00,Bb MAJOR_TRIAD,"QUARTSEXT(4,6)",Bb DUR,T (I),,moz570a_20240513233657.mid
4,5,2.00,Bb MAJOR_TRIAD,"QUARTSEXT(4,6)",Bb DUR,T (I),,moz570a_20240513233657.mid
...,...,...,...,...,...,...,...,...
485,208,1.50,Bb MAJOR_TRIAD,SEXT(6),Bb DUR,T (I),,moz570a_20240513233657.mid
486,208,2.25,F DOMINANT_SEVENTH_INCOMPLETE,ROOT(5),Bb DUR,D (V),,moz570a_20240513233657.mid
487,208,2.75,D MINOR_TRIAD,SEXT(6),Bb DUR,III,,moz570a_20240513233657.mid
488,208,3.00,,,Bb DUR,,,moz570a_20240513233657.mid


## Analýza taktov a počet udalostí v nich

In [143]:
def assign_color(row):
    if row['Function'] is not None:
        return 'Rozpoznaná harmonická funkcia'
    elif row['Key'] is not None:
        return 'Rozpoznaná tónina'
    elif row['Chord'] is not None:
        return 'Rozpoznaný akord'
    else:
        return 'Iné'

colors = df.apply(assign_color, axis=1)

color_discrete_sequence=['green', 'orange', 'gray', 'lightgray']

fig = px.scatter(df, x='Order', y='End', color=colors,
                 color_discrete_sequence=color_discrete_sequence,
                 title='Graf taktov a udalostí v nich',
                 labels={'Order': 'Takt', 'End': 'Doba'}
                 )

fig.show()


In [144]:
measure_durations = df.groupby('Order')['End'].max().reset_index()
measure_durations.columns = ['Order', 'Duration']

duration_counts = measure_durations['Duration'].value_counts().reset_index()
duration_counts.columns = ['Duration', 'Count']

total_measures = duration_counts['Count'].sum()
duration_counts['Percentage'] = (duration_counts['Count'] / total_measures) * 100

print(duration_counts)

   Duration  Count  Percentage
0       3.0    209       100.0


## Výskyt jednotlivých akordov

In [145]:
df['Chord'] = df['Chord'].str.replace('!', '', regex=False)

chord_counts = df['Chord'].value_counts().reset_index()
chord_counts.columns = ['Chord', 'Count']

chord_types = ['MAJOR_TRIAD', 'MINOR_TRIAD', 'AUGMENTED_TRIAD', 'DIMINISHED_TRIAD', 'DOMINANT_SEVENTH',
               'DIMINISHED_SEVENTH', 'DIMINISHED_MINOR_SEVENTH', 'MAJOR_SEVENTH', 'MINOR_SEVENTH',
               'AUGMENTED_SEVENTH', 'MINOR_MAJOR_SEVENTH', 'DOMINANT_SEVENTH_INCOMPLETE',
               'DOMINANT_SEVENTH_ALT_INCOMPLETE', 'MAJOR_SEVENTH_INCOMPLETE',
               'DIMINISHED_SEVENTH_INCOMPLETE', 'DIMINISHED_MINOR_SEVENTH_INCOMPLETE',
               'MINOR_MAJOR_SEVENTH_INCOMPLETE']

note_mapping = {'C': 0, 'H#': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
                'E': 4, 'Fb': 4, 'F': 5, 'E#': 5, 'F#': 6, 'Gb': 6, 'G': 7,
                'G#': 8, 'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11, 'Cb': 11}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

data = []
for index, row in chord_counts.iterrows():
    chord_note, chord_type = row['Chord'].split()
    x_value = chord_types.index(chord_type) + 1
    y_value = note_mapping[chord_note]
    size = row['Count']
    data.append((x_value, y_value, size, row['Chord']))

plot_df = pd.DataFrame(data, columns=['Chord_Type', 'Note', 'Size', 'Chord'])

fig = go.Figure()

for record in data:
    fig.add_trace(go.Scatter(
        x=[record[0]], y=[record[1]],
        mode='markers',
        marker=dict(size=record[2], color=record[2]),
        text=record[2],
        name=record[3]
    ))

fig.update_layout(
    title='Výskyt akordov',
    xaxis=dict(
        title='Typ akordu',
        tickmode='array',
        tickvals=list(range(1, len(chord_types) + 1)),
        ticktext=chord_types,
        tickangle=45
    ),
    yaxis=dict(
        title='Tóny',
        tickmode='array',
        tickvals=list(inverse_note_mapping.keys()),
        ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()]
    )
)

fig.show()

## Zmena tóniny a jej percentuálne zastúpenia

In [153]:
import plotly.graph_objects as go

color_mapping = {'MOL': 'blue', 'DUR': 'green'}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

def calculate_ticks_x(df):
    x_values = []
    actual_position = 0
    previous_order = ''
    previous_end = 0
    for index, row in df.iterrows():
        if previous_order == row['Order']:
          actual_position += (row['End']-previous_end)
        else:
          actual_position += row['End']
        x_values.append(actual_position)
        previous_order = row['Order']
        previous_end = row['End']
    return x_values

df['Time'] = calculate_ticks_x(df)
df['Key_Value'] = df['Key'].apply(lambda x: note_mapping[x.split()[0]] if pd.notna(x) else None)

def determine_color(key):
    if pd.notna(key):
        if 'MOL' in key:
            return 'MOL'
        elif 'DUR' in key:
            return 'DUR'
    return 'black'

df['Key_Color'] = df['Key'].apply(determine_color)

df_filtered = df.dropna(subset=['Key_Value'])

fig = go.Figure()

# Line plot
fig.add_trace(go.Scatter(x=df_filtered['Time'], y=df_filtered['Key_Value'], mode='lines',
                         line=dict(color='rgb(204, 204, 204)'), name='Zmeny tóniny v čase'))

# Scatter plot pre jednotlivé body
for color, group in df_filtered.groupby('Key_Color'):
    fig.add_trace(go.Scatter(x=group['Time'], y=group['Key_Value'], mode='markers',
                             marker=dict(size=10, color=color_mapping[color], opacity=0.7),
                             hovertemplate='<b>Tónina:</b> %{customdata[0]}<br><b>Takt:</b> %{customdata[1]}<br><b>Doba:</b> %{customdata[2]}',
                             customdata=group[['Key', 'Order', 'End']], name=color))

fig.update_yaxes(tickmode='array', tickvals=list(inverse_note_mapping.keys()),
                 ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()])

fig.update_layout(title='Zmeny tóniny v čase', xaxis_title='Doby', yaxis_title='Tónina')

fig.show()


In [147]:
key_counts = df['Key'].value_counts(normalize=False)
key_counts['None'] = len(df) - key_counts.sum()

key_percentages = key_counts / len(df) * 100
key_counts_df = pd.DataFrame({'Key': key_percentages.index, 'Percent': key_percentages.values})

fig = px.pie(key_counts_df, names='Key', values='Percent',
             title='Percentuálne zastúpenie jednotlivých tónin')

fig.show()

## Harmonické funkcie

In [148]:
function_counts = df['Function'].value_counts(normalize=False)
function_counts['None'] = len(df) - function_counts.sum()

function_percentages = function_counts / len(df) * 100
function_counts_df = pd.DataFrame({'Function': function_percentages.index, 'Percent': function_percentages.values})

fig = px.pie(function_counts_df, names='Function', values='Percent',
             title='Percentuálne zastúpenie jednotlivých harmonických funkcií')

fig.show()

In [149]:
function_order = {'T (I)': 0, 'II': 1, 'III': 2, 'S (IV)': 3, 'D (V)': 4, 'VI': 5, 'VII': 6}

df['Function_Order'] = df['Function'].map(function_order)
df_filtered = df.copy().dropna(subset=['Function_Order'])
df_filtered['Time_Hover'] = df_filtered.groupby('Order')['Time'].transform('last')

hover_text = []
for index, row in df_filtered.iterrows():
    hover_text.append(f"Tónina: {row['Key']}, Takt: {row['Order']}, Doba: {row['End']}, Harmonická funkcia: {row['Function']}")

fig = go.Figure(data=go.Scatter(x=df_filtered['Time_Hover'], y=df_filtered['Function_Order'], mode='markers',
                                marker=dict(size=10, color=df_filtered['Function_Order'], colorscale='Viridis'),
                                hovertext=hover_text))

fig.update_layout(title='Zmeny harmonických funkcií v čase', xaxis_title='Doby', yaxis_title='Harmonická funkcia')
fig.update_yaxes(tickvals=list(function_order.values()), ticktext=list(function_order.keys()))

fig.show()


## Kadencie

In [150]:
df_filtered = df.copy().dropna(subset=['Sequence', 'Function']).drop(columns=['File', 'Time', 'Key_Value', 'Key_Color', 'Function_Order', 'Root'])
df_filtered

Unnamed: 0,Order,End,Chord,Key,Function,Sequence
66,31,1.0,Bb MAJOR_TRIAD,Bb DUR,T (I),"[T (I), VI, D (V), T (I)]"
67,31,2.0,Bb MAJOR_SEVENTH,Bb DUR,T (I),"[T (I), VI, D (V), T (I)]"
68,31,3.0,G MINOR_SEVENTH,Bb DUR,VI,"[T (I), VI, D (V), T (I)]"
77,35,3.0,C DOMINANT_SEVENTH_INCOMPLETE,F DUR,D (V),"[T (I), VI, D (V), T (I)]"
78,36,1.0,F MAJOR_TRIAD,F DUR,T (I),"[T (I), VI, D (V), T (I)]"
255,111,3.0,C MINOR_TRIAD,C MOLL,T (I),"[T (I), II, D (V), T (I)]"
256,112,2.0,D DIMINISHED_TRIAD,C MOLL,II,"[T (I), II, D (V), T (I)]"
264,118,1.0,Bb DOMINANT_SEVENTH,Eb DUR,D (V),"[T (I), II, D (V), T (I)]"
265,118,1.5,Eb MAJOR_TRIAD,Eb DUR,T (I),"[T (I), II, D (V), T (I)]"


# Beethoven

In [154]:
column_names = ['Order', 'End', 'Chord', 'Root', 'Key', 'Function', 'Sequence', 'File']
df = pd.read_csv("beethoven.txt", sep=':', names=column_names)
df.replace({'null': None, np.nan: None, ' null':None}, inplace=True)
df['Sequence'] = df['Sequence'].apply(lambda x: x.strip('[]').split(', ') if x is not None else None)
df

Unnamed: 0,Order,End,Chord,Root,Key,Function,Sequence,File
0,1,3.000,C MINOR_TRIAD,ROOT(5),C MOLL,T (I),,beeth101_20240513233548.mid
1,2,1.125,C MINOR_TRIAD,SEXT(6),C MOLL,T (I),,beeth101_20240513233548.mid
2,2,3.000,C MINOR_TRIAD,"QUARTSEXT(4,6)",C MOLL,T (I),,beeth101_20240513233548.mid
3,3,3.000,C MINOR_TRIAD,ROOT(5),C MOLL,T (I),,beeth101_20240513233548.mid
4,4,3.000,C MINOR_TRIAD,ROOT(5),C MOLL,T (I),,beeth101_20240513233548.mid
...,...,...,...,...,...,...,...,...
576,270,0.750,C MINOR_TRIAD,ROOT(5),C MOLL,T (I),,beeth101_20240513233548.mid
577,270,3.000,,,C MOLL,,,beeth101_20240513233548.mid
578,271,1.000,Fb! MINOR_SEVENTH,"QUINTSEXT(5,6)",C MOLL,,,beeth101_20240513233548.mid
579,271,3.000,,,C MOLL,,,beeth101_20240513233548.mid


## Analýza taktov a počet udalostí v nich

In [155]:
def assign_color(row):
    if row['Function'] is not None:
        return 'Rozpoznaná harmonická funkcia'
    elif row['Key'] is not None:
        return 'Rozpoznaná tónina'
    elif row['Chord'] is not None:
        return 'Rozpoznaný akord'
    else:
        return 'Iné'

colors = df.apply(assign_color, axis=1)

color_discrete_sequence=['green', 'orange', 'gray', 'lightgray']

fig = px.scatter(df, x='Order', y='End', color=colors,
                 color_discrete_sequence=color_discrete_sequence,
                 title='Graf taktov a udalostí v nich',
                 labels={'Order': 'Takt', 'End': 'Doba'}
                 )

fig.show()


In [156]:
measure_durations = df.groupby('Order')['End'].max().reset_index()
measure_durations.columns = ['Order', 'Duration']

duration_counts = measure_durations['Duration'].value_counts().reset_index()
duration_counts.columns = ['Duration', 'Count']

total_measures = duration_counts['Count'].sum()
duration_counts['Percentage'] = (duration_counts['Count'] / total_measures) * 100

print(duration_counts)

   Duration  Count  Percentage
0       3.0    272       100.0


## Výskyt jednotlivých akordov

In [157]:
df['Chord'] = df['Chord'].str.replace('!', '', regex=False)

chord_counts = df['Chord'].value_counts().reset_index()
chord_counts.columns = ['Chord', 'Count']

chord_types = ['MAJOR_TRIAD', 'MINOR_TRIAD', 'AUGMENTED_TRIAD', 'DIMINISHED_TRIAD', 'DOMINANT_SEVENTH',
               'DIMINISHED_SEVENTH', 'DIMINISHED_MINOR_SEVENTH', 'MAJOR_SEVENTH', 'MINOR_SEVENTH',
               'AUGMENTED_SEVENTH', 'MINOR_MAJOR_SEVENTH', 'DOMINANT_SEVENTH_INCOMPLETE',
               'DOMINANT_SEVENTH_ALT_INCOMPLETE', 'MAJOR_SEVENTH_INCOMPLETE',
               'DIMINISHED_SEVENTH_INCOMPLETE', 'DIMINISHED_MINOR_SEVENTH_INCOMPLETE',
               'MINOR_MAJOR_SEVENTH_INCOMPLETE']

note_mapping = {'C': 0, 'H#': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
                'E': 4, 'Fb': 4, 'F': 5, 'E#': 5, 'F#': 6, 'Gb': 6, 'G': 7,
                'G#': 8, 'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11, 'Cb': 11}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

data = []
for index, row in chord_counts.iterrows():
    chord_note, chord_type = row['Chord'].split()
    x_value = chord_types.index(chord_type) + 1
    y_value = note_mapping[chord_note]
    size = row['Count']
    data.append((x_value, y_value, size, row['Chord']))

plot_df = pd.DataFrame(data, columns=['Chord_Type', 'Note', 'Size', 'Chord'])

fig = go.Figure()

for record in data:
    fig.add_trace(go.Scatter(
        x=[record[0]], y=[record[1]],
        mode='markers',
        marker=dict(size=record[2], color=record[2]),
        text=record[2],
        name=record[3]
    ))

fig.update_layout(
    title='Výskyt akordov',
    xaxis=dict(
        title='Typ akordu',
        tickmode='array',
        tickvals=list(range(1, len(chord_types) + 1)),
        ticktext=chord_types,
        tickangle=45
    ),
    yaxis=dict(
        title='Tóny',
        tickmode='array',
        tickvals=list(inverse_note_mapping.keys()),
        ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()]
    )
)

fig.show()

## Zmena tóniny a jej percentuálne zastúpenia

In [158]:
import plotly.graph_objects as go

color_mapping = {'MOL': 'blue', 'DUR': 'green'}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

def calculate_ticks_x(df):
    x_values = []
    actual_position = 0
    previous_order = ''
    previous_end = 0
    for index, row in df.iterrows():
        if previous_order == row['Order']:
          actual_position += (row['End']-previous_end)
        else:
          actual_position += row['End']
        x_values.append(actual_position)
        previous_order = row['Order']
        previous_end = row['End']
    return x_values

df['Time'] = calculate_ticks_x(df)
df['Key_Value'] = df['Key'].apply(lambda x: note_mapping[x.split()[0]] if pd.notna(x) else None)

def determine_color(key):
    if pd.notna(key):
        if 'MOL' in key:
            return 'MOL'
        elif 'DUR' in key:
            return 'DUR'
    return 'black'

df['Key_Color'] = df['Key'].apply(determine_color)

df_filtered = df.dropna(subset=['Key_Value'])

fig = go.Figure()

# Line plot
fig.add_trace(go.Scatter(x=df_filtered['Time'], y=df_filtered['Key_Value'], mode='lines',
                         line=dict(color='rgb(204, 204, 204)'), name='Zmeny tóniny v čase'))

# Scatter plot pre jednotlivé body
for color, group in df_filtered.groupby('Key_Color'):
    fig.add_trace(go.Scatter(x=group['Time'], y=group['Key_Value'], mode='markers',
                             marker=dict(size=10, color=color_mapping[color], opacity=0.7),
                             hovertemplate='<b>Tónina:</b> %{customdata[0]}<br><b>Takt:</b> %{customdata[1]}<br><b>Doba:</b> %{customdata[2]}',
                             customdata=group[['Key', 'Order', 'End']], name=color))

fig.update_yaxes(tickmode='array', tickvals=list(inverse_note_mapping.keys()),
                 ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()])

fig.update_layout(title='Zmeny tóniny v čase', xaxis_title='Doby', yaxis_title='Tónina')

fig.show()


In [159]:
key_counts = df['Key'].value_counts(normalize=False)
key_counts['None'] = len(df) - key_counts.sum()

key_percentages = key_counts / len(df) * 100
key_counts_df = pd.DataFrame({'Key': key_percentages.index, 'Percent': key_percentages.values})

fig = px.pie(key_counts_df, names='Key', values='Percent',
             title='Percentuálne zastúpenie jednotlivých tónin')

fig.show()

## Harmonické funkcie

In [160]:
function_counts = df['Function'].value_counts(normalize=False)
function_counts['None'] = len(df) - function_counts.sum()

function_percentages = function_counts / len(df) * 100
function_counts_df = pd.DataFrame({'Function': function_percentages.index, 'Percent': function_percentages.values})

fig = px.pie(function_counts_df, names='Function', values='Percent',
             title='Percentuálne zastúpenie jednotlivých harmonických funkcií')

fig.show()

In [161]:
function_order = {'T (I)': 0, 'II': 1, 'III': 2, 'S (IV)': 3, 'D (V)': 4, 'VI': 5, 'VII': 6}

df['Function_Order'] = df['Function'].map(function_order)
df_filtered = df.copy().dropna(subset=['Function_Order'])
df_filtered['Time_Hover'] = df_filtered.groupby('Order')['Time'].transform('last')

hover_text = []
for index, row in df_filtered.iterrows():
    hover_text.append(f"Tónina: {row['Key']}, Takt: {row['Order']}, Doba: {row['End']}, Harmonická funkcia: {row['Function']}")

fig = go.Figure(data=go.Scatter(x=df_filtered['Time_Hover'], y=df_filtered['Function_Order'], mode='markers',
                                marker=dict(size=10, color=df_filtered['Function_Order'], colorscale='Viridis'),
                                hovertext=hover_text))

fig.update_layout(title='Zmeny harmonických funkcií v čase', xaxis_title='Doby', yaxis_title='Harmonická funkcia')
fig.update_yaxes(tickvals=list(function_order.values()), ticktext=list(function_order.keys()))

fig.show()


## Kadencie

In [162]:
df_filtered = df.copy().dropna(subset=['Sequence', 'Function']).drop(columns=['File', 'Time', 'Key_Value', 'Key_Color', 'Function_Order', 'Root'])
df_filtered

Unnamed: 0,Order,End,Chord,Key,Function,Sequence
52,26,3.0,C MINOR_TRIAD,C MOLL,T (I),"[T (I), II, D (V), T (I)]"
53,27,3.0,C MINOR_TRIAD,C MOLL,T (I),"[T (I), II, D (V), T (I)]"
54,28,0.75,C MINOR_TRIAD,C MOLL,T (I),"[T (I), II, D (V), T (I)]"
55,28,1.5,D DIMINISHED_TRIAD,C MOLL,II,"[T (I), II, D (V), T (I)]"
57,29,3.0,G MAJOR_TRIAD,C MOLL,D (V),"[T (I), II, D (V), T (I)]"
58,30,3.0,C MINOR_TRIAD,C MOLL,T (I),"[T (I), II, D (V), T (I)]"
133,64,1.125,Eb MAJOR_TRIAD,Eb DUR,T (I),"[T (I), VI, D (V), T (I)]"
135,64,1.875,Eb MAJOR_TRIAD,Eb DUR,T (I),"[T (I), VI, D (V), T (I)]"
137,65,2.0,Eb MAJOR_TRIAD,Eb DUR,T (I),"[T (I), VI, D (V), T (I)]"
138,65,3.0,C MINOR_SEVENTH,Eb DUR,VI,"[T (I), VI, D (V), T (I)]"


# Haydn

In [165]:
column_names = ['Order', 'End', 'Chord', 'Root', 'Key', 'Function', 'Sequence', 'File']
df = pd.read_csv("haydn.txt", sep=':', names=column_names)
df.replace({'null': None, np.nan: None, ' null':None}, inplace=True)
df['Sequence'] = df['Sequence'].apply(lambda x: x.strip('[]').split(', ') if x is not None else None)
df

Unnamed: 0,Order,End,Chord,Root,Key,Function,Sequence,File
0,2,1.0,,,G DUR,,,haydn-so_20240513235355.mid
1,2,2.0,D DOMINANT_SEVENTH_INCOMPLETE,"QUARTSEXT(4,6)",G DUR,D (V),,haydn-so_20240513235355.mid
2,3,1.0,G MAJOR_TRIAD,ROOT(5),G DUR,T (I),,haydn-so_20240513235355.mid
3,3,2.0,E MINOR_SEVENTH,"QUINTSEXT(5,6)",G DUR,VI,,haydn-so_20240513235355.mid
4,4,2.0,,,G DUR,,,haydn-so_20240513235355.mid
...,...,...,...,...,...,...,...,...
351,161,2.0,,,G DUR,,,haydn-so_20240513235355.mid
352,162,1.0,C MAJOR_SEVENTH,"TERZQUART(3,4)",G DUR,S (IV),,haydn-so_20240513235355.mid
353,162,2.0,A MINOR_TRIAD,SEXT(6),G DUR,II,,haydn-so_20240513235355.mid
354,163,2.0,G MAJOR_TRIAD,SEXT(6),G DUR,T (I),,haydn-so_20240513235355.mid


## Analýza taktov a počet udalostí v nich

In [166]:
def assign_color(row):
    if row['Function'] is not None:
        return 'Rozpoznaná harmonická funkcia'
    elif row['Key'] is not None:
        return 'Rozpoznaná tónina'
    elif row['Chord'] is not None:
        return 'Rozpoznaný akord'
    else:
        return 'Iné'

colors = df.apply(assign_color, axis=1)

color_discrete_sequence=['green', 'orange', 'gray', 'lightgray']

fig = px.scatter(df, x='Order', y='End', color=colors,
                 color_discrete_sequence=color_discrete_sequence,
                 title='Graf taktov a udalostí v nich',
                 labels={'Order': 'Takt', 'End': 'Doba'}
                 )

fig.show()


In [167]:
measure_durations = df.groupby('Order')['End'].max().reset_index()
measure_durations.columns = ['Order', 'Duration']

duration_counts = measure_durations['Duration'].value_counts().reset_index()
duration_counts.columns = ['Duration', 'Count']

total_measures = duration_counts['Count'].sum()
duration_counts['Percentage'] = (duration_counts['Count'] / total_measures) * 100

print(duration_counts)

   Duration  Count  Percentage
0       2.0    156        97.5
1       1.0      4         2.5


## Výskyt jednotlivých akordov

In [168]:
df['Chord'] = df['Chord'].str.replace('!', '', regex=False)

chord_counts = df['Chord'].value_counts().reset_index()
chord_counts.columns = ['Chord', 'Count']

chord_types = ['MAJOR_TRIAD', 'MINOR_TRIAD', 'AUGMENTED_TRIAD', 'DIMINISHED_TRIAD', 'DOMINANT_SEVENTH',
               'DIMINISHED_SEVENTH', 'DIMINISHED_MINOR_SEVENTH', 'MAJOR_SEVENTH', 'MINOR_SEVENTH',
               'AUGMENTED_SEVENTH', 'MINOR_MAJOR_SEVENTH', 'DOMINANT_SEVENTH_INCOMPLETE',
               'DOMINANT_SEVENTH_ALT_INCOMPLETE', 'MAJOR_SEVENTH_INCOMPLETE',
               'DIMINISHED_SEVENTH_INCOMPLETE', 'DIMINISHED_MINOR_SEVENTH_INCOMPLETE',
               'MINOR_MAJOR_SEVENTH_INCOMPLETE']

note_mapping = {'C': 0, 'H#': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
                'E': 4, 'Fb': 4, 'F': 5, 'E#': 5, 'F#': 6, 'Gb': 6, 'G': 7,
                'G#': 8, 'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11, 'Cb': 11}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

data = []
for index, row in chord_counts.iterrows():
    chord_note, chord_type = row['Chord'].split()
    x_value = chord_types.index(chord_type) + 1
    y_value = note_mapping[chord_note]
    size = row['Count']
    data.append((x_value, y_value, size, row['Chord']))

plot_df = pd.DataFrame(data, columns=['Chord_Type', 'Note', 'Size', 'Chord'])

fig = go.Figure()

for record in data:
    fig.add_trace(go.Scatter(
        x=[record[0]], y=[record[1]],
        mode='markers',
        marker=dict(size=record[2], color=record[2]),
        text=record[2],
        name=record[3]
    ))

fig.update_layout(
    title='Výskyt akordov',
    xaxis=dict(
        title='Typ akordu',
        tickmode='array',
        tickvals=list(range(1, len(chord_types) + 1)),
        ticktext=chord_types,
        tickangle=45
    ),
    yaxis=dict(
        title='Tóny',
        tickmode='array',
        tickvals=list(inverse_note_mapping.keys()),
        ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()]
    )
)

fig.show()

## Zmena tóniny a jej percentuálne zastúpenia

In [169]:
import plotly.graph_objects as go

color_mapping = {'MOL': 'blue', 'DUR': 'green'}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

def calculate_ticks_x(df):
    x_values = []
    actual_position = 0
    previous_order = ''
    previous_end = 0
    for index, row in df.iterrows():
        if previous_order == row['Order']:
          actual_position += (row['End']-previous_end)
        else:
          actual_position += row['End']
        x_values.append(actual_position)
        previous_order = row['Order']
        previous_end = row['End']
    return x_values

df['Time'] = calculate_ticks_x(df)
df['Key_Value'] = df['Key'].apply(lambda x: note_mapping[x.split()[0]] if pd.notna(x) else None)

def determine_color(key):
    if pd.notna(key):
        if 'MOL' in key:
            return 'MOL'
        elif 'DUR' in key:
            return 'DUR'
    return 'black'

df['Key_Color'] = df['Key'].apply(determine_color)

df_filtered = df.dropna(subset=['Key_Value'])

fig = go.Figure()

# Line plot
fig.add_trace(go.Scatter(x=df_filtered['Time'], y=df_filtered['Key_Value'], mode='lines',
                         line=dict(color='rgb(204, 204, 204)'), name='Zmeny tóniny v čase'))

# Scatter plot pre jednotlivé body
for color, group in df_filtered.groupby('Key_Color'):
    fig.add_trace(go.Scatter(x=group['Time'], y=group['Key_Value'], mode='markers',
                             marker=dict(size=10, color=color_mapping[color], opacity=0.7),
                             hovertemplate='<b>Tónina:</b> %{customdata[0]}<br><b>Takt:</b> %{customdata[1]}<br><b>Doba:</b> %{customdata[2]}',
                             customdata=group[['Key', 'Order', 'End']], name=color))

fig.update_yaxes(tickmode='array', tickvals=list(inverse_note_mapping.keys()),
                 ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()])

fig.update_layout(title='Zmeny tóniny v čase', xaxis_title='Doby', yaxis_title='Tónina')

fig.show()


In [170]:
key_counts = df['Key'].value_counts(normalize=False)
key_counts['None'] = len(df) - key_counts.sum()

key_percentages = key_counts / len(df) * 100
key_counts_df = pd.DataFrame({'Key': key_percentages.index, 'Percent': key_percentages.values})

fig = px.pie(key_counts_df, names='Key', values='Percent',
             title='Percentuálne zastúpenie jednotlivých tónin')

fig.show()

## Harmonické funkcie

In [171]:
function_counts = df['Function'].value_counts(normalize=False)
function_counts['None'] = len(df) - function_counts.sum()

function_percentages = function_counts / len(df) * 100
function_counts_df = pd.DataFrame({'Function': function_percentages.index, 'Percent': function_percentages.values})

fig = px.pie(function_counts_df, names='Function', values='Percent',
             title='Percentuálne zastúpenie jednotlivých harmonických funkcií')

fig.show()

In [172]:
function_order = {'T (I)': 0, 'II': 1, 'III': 2, 'S (IV)': 3, 'D (V)': 4, 'VI': 5, 'VII': 6}

df['Function_Order'] = df['Function'].map(function_order)
df_filtered = df.copy().dropna(subset=['Function_Order'])
df_filtered['Time_Hover'] = df_filtered.groupby('Order')['Time'].transform('last')

hover_text = []
for index, row in df_filtered.iterrows():
    hover_text.append(f"Tónina: {row['Key']}, Takt: {row['Order']}, Doba: {row['End']}, Harmonická funkcia: {row['Function']}")

fig = go.Figure(data=go.Scatter(x=df_filtered['Time_Hover'], y=df_filtered['Function_Order'], mode='markers',
                                marker=dict(size=10, color=df_filtered['Function_Order'], colorscale='Viridis'),
                                hovertext=hover_text))

fig.update_layout(title='Zmeny harmonických funkcií v čase', xaxis_title='Doby', yaxis_title='Harmonická funkcia')
fig.update_yaxes(tickvals=list(function_order.values()), ticktext=list(function_order.keys()))

fig.show()


## Kadencie

In [173]:
df_filtered = df.copy().dropna(subset=['Sequence', 'Function']).drop(columns=['File', 'Time', 'Key_Value', 'Key_Color', 'Function_Order', 'Root'])
df_filtered

Unnamed: 0,Order,End,Chord,Key,Function,Sequence
137,70,2.0,C MAJOR_TRIAD,C DUR,T (I),"[T (I), VI, D (V), T (I)]"
138,71,0.5,A MAJOR_TRIAD,C DUR,VI,"[T (I), VI, D (V), T (I)]"
140,71,2.0,A DOMINANT_SEVENTH_INCOMPLETE,D DUR,D (V),"[T (I), VI, D (V), T (I)]"
141,72,0.5,D MAJOR_TRIAD,D DUR,T (I),"[T (I), VI, D (V), T (I)]"
146,74,0.5,D MAJOR_TRIAD,D DUR,T (I),"[T (I), VI, D (V), T (I)]"
148,74,2.0,D MAJOR_TRIAD,D DUR,T (I),"[T (I), VI, D (V), T (I)]"
149,75,1.0,B MAJOR_TRIAD,D DUR,VI,"[T (I), VI, D (V), T (I)]"
150,75,2.0,B DOMINANT_SEVENTH_INCOMPLETE,E MOLL,D (V),"[T (I), VI, D (V), T (I)]"
151,76,2.0,E MINOR_TRIAD,E MOLL,T (I),"[T (I), VI, D (V), T (I)]"
258,123,2.0,C MAJOR_TRIAD,C DUR,T (I),"[T (I), VI, D (V), T (I)]"


# Turca

In [176]:
column_names = ['Order', 'End', 'Chord', 'Root', 'Key', 'Function', 'Sequence', 'File']
df = pd.read_csv("turca.txt", sep=':', names=column_names)
df.replace({'null': None, np.nan: None, ' null':None}, inplace=True)
df['Sequence'] = df['Sequence'].apply(lambda x: x.strip('[]').split(', ') if x is not None else None)
df

Unnamed: 0,Order,End,Chord,Root,Key,Function,Sequence,File
0,1,2.0,,,A MOLL,,,turca_20240128182224.mid
1,2,1.0,A MINOR_TRIAD,ROOT(5),A MOLL,T (I),,turca_20240128182224.mid
2,2,2.0,,,A MOLL,,,turca_20240128182224.mid
3,3,2.0,F DOMINANT_SEVENTH_INCOMPLETE,SEXT(6),,,,turca_20240128182224.mid
4,4,1.0,E MAJOR_TRIAD,ROOT(5),,,,turca_20240128182224.mid
...,...,...,...,...,...,...,...,...
386,219,2.0,A MAJOR_TRIAD,ROOT(5),A DUR,T (I),,turca_20240128182224.mid
387,220,2.0,A MAJOR_TRIAD,ROOT(5),A DUR,T (I),,turca_20240128182224.mid
388,221,2.0,,,A DUR,,,turca_20240128182224.mid
389,222,2.0,A MAJOR_TRIAD,ROOT(5),A DUR,T (I),,turca_20240128182224.mid


## Analýza taktov a počet udalostí v nich

In [177]:
def assign_color(row):
    if row['Function'] is not None:
        return 'Rozpoznaná harmonická funkcia'
    elif row['Key'] is not None:
        return 'Rozpoznaná tónina'
    elif row['Chord'] is not None:
        return 'Rozpoznaný akord'
    else:
        return 'Iné'

colors = df.apply(assign_color, axis=1)

color_discrete_sequence=['green', 'orange', 'gray', 'lightgray']

fig = px.scatter(df, x='Order', y='End', color=colors,
                 color_discrete_sequence=color_discrete_sequence,
                 title='Graf taktov a udalostí v nich',
                 labels={'Order': 'Takt', 'End': 'Doba'}
                 )

fig.show()


In [178]:
measure_durations = df.groupby('Order')['End'].max().reset_index()
measure_durations.columns = ['Order', 'Duration']

duration_counts = measure_durations['Duration'].value_counts().reset_index()
duration_counts.columns = ['Duration', 'Count']

total_measures = duration_counts['Count'].sum()
duration_counts['Percentage'] = (duration_counts['Count'] / total_measures) * 100

print(duration_counts)

   Duration  Count  Percentage
0       2.0    223       100.0


## Výskyt jednotlivých akordov

In [179]:
df['Chord'] = df['Chord'].str.replace('!', '', regex=False)

chord_counts = df['Chord'].value_counts().reset_index()
chord_counts.columns = ['Chord', 'Count']

chord_types = ['MAJOR_TRIAD', 'MINOR_TRIAD', 'AUGMENTED_TRIAD', 'DIMINISHED_TRIAD', 'DOMINANT_SEVENTH',
               'DIMINISHED_SEVENTH', 'DIMINISHED_MINOR_SEVENTH', 'MAJOR_SEVENTH', 'MINOR_SEVENTH',
               'AUGMENTED_SEVENTH', 'MINOR_MAJOR_SEVENTH', 'DOMINANT_SEVENTH_INCOMPLETE',
               'DOMINANT_SEVENTH_ALT_INCOMPLETE', 'MAJOR_SEVENTH_INCOMPLETE',
               'DIMINISHED_SEVENTH_INCOMPLETE', 'DIMINISHED_MINOR_SEVENTH_INCOMPLETE',
               'MINOR_MAJOR_SEVENTH_INCOMPLETE']

note_mapping = {'C': 0, 'H#': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
                'E': 4, 'Fb': 4, 'F': 5, 'E#': 5, 'F#': 6, 'Gb': 6, 'G': 7,
                'G#': 8, 'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11, 'Cb': 11}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

data = []
for index, row in chord_counts.iterrows():
    chord_note, chord_type = row['Chord'].split()
    x_value = chord_types.index(chord_type) + 1
    y_value = note_mapping[chord_note]
    size = row['Count']
    data.append((x_value, y_value, size, row['Chord']))

plot_df = pd.DataFrame(data, columns=['Chord_Type', 'Note', 'Size', 'Chord'])

fig = go.Figure()

for record in data:
    fig.add_trace(go.Scatter(
        x=[record[0]], y=[record[1]],
        mode='markers',
        marker=dict(size=record[2], color=record[2]),
        text=record[2],
        name=record[3]
    ))

fig.update_layout(
    title='Výskyt akordov',
    xaxis=dict(
        title='Typ akordu',
        tickmode='array',
        tickvals=list(range(1, len(chord_types) + 1)),
        ticktext=chord_types,
        tickangle=45
    ),
    yaxis=dict(
        title='Tóny',
        tickmode='array',
        tickvals=list(inverse_note_mapping.keys()),
        ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()]
    )
)

fig.show()

## Zmena tóniny a jej percentuálne zastúpenia

In [180]:
import plotly.graph_objects as go

color_mapping = {'MOL': 'blue', 'DUR': 'green'}

inverse_note_mapping = {}
for key, value in note_mapping.items():
    if value not in inverse_note_mapping:
        inverse_note_mapping[value] = [key]
    else:
        inverse_note_mapping[value].append(key)

def calculate_ticks_x(df):
    x_values = []
    actual_position = 0
    previous_order = ''
    previous_end = 0
    for index, row in df.iterrows():
        if previous_order == row['Order']:
          actual_position += (row['End']-previous_end)
        else:
          actual_position += row['End']
        x_values.append(actual_position)
        previous_order = row['Order']
        previous_end = row['End']
    return x_values

df['Time'] = calculate_ticks_x(df)
df['Key_Value'] = df['Key'].apply(lambda x: note_mapping[x.split()[0]] if pd.notna(x) else None)

def determine_color(key):
    if pd.notna(key):
        if 'MOL' in key:
            return 'MOL'
        elif 'DUR' in key:
            return 'DUR'
    return 'black'

df['Key_Color'] = df['Key'].apply(determine_color)

df_filtered = df.dropna(subset=['Key_Value'])

fig = go.Figure()

# Line plot
fig.add_trace(go.Scatter(x=df_filtered['Time'], y=df_filtered['Key_Value'], mode='lines',
                         line=dict(color='rgb(204, 204, 204)'), name='Zmeny tóniny v čase'))

# Scatter plot pre jednotlivé body
for color, group in df_filtered.groupby('Key_Color'):
    fig.add_trace(go.Scatter(x=group['Time'], y=group['Key_Value'], mode='markers',
                             marker=dict(size=10, color=color_mapping[color], opacity=0.7),
                             hovertemplate='<b>Tónina:</b> %{customdata[0]}<br><b>Takt:</b> %{customdata[1]}<br><b>Doba:</b> %{customdata[2]}',
                             customdata=group[['Key', 'Order', 'End']], name=color))

fig.update_yaxes(tickmode='array', tickvals=list(inverse_note_mapping.keys()),
                 ticktext=[', '.join(notes) for notes in inverse_note_mapping.values()])

fig.update_layout(title='Zmeny tóniny v čase', xaxis_title='Doby', yaxis_title='Tónina')

fig.show()


In [181]:
key_counts = df['Key'].value_counts(normalize=False)
key_counts['None'] = len(df) - key_counts.sum()

key_percentages = key_counts / len(df) * 100
key_counts_df = pd.DataFrame({'Key': key_percentages.index, 'Percent': key_percentages.values})

fig = px.pie(key_counts_df, names='Key', values='Percent',
             title='Percentuálne zastúpenie jednotlivých tónin')

fig.show()

## Harmonické funkcie

In [182]:
function_counts = df['Function'].value_counts(normalize=False)
function_counts['None'] = len(df) - function_counts.sum()

function_percentages = function_counts / len(df) * 100
function_counts_df = pd.DataFrame({'Function': function_percentages.index, 'Percent': function_percentages.values})

fig = px.pie(function_counts_df, names='Function', values='Percent',
             title='Percentuálne zastúpenie jednotlivých harmonických funkcií')

fig.show()

In [183]:
function_order = {'T (I)': 0, 'II': 1, 'III': 2, 'S (IV)': 3, 'D (V)': 4, 'VI': 5, 'VII': 6}

df['Function_Order'] = df['Function'].map(function_order)
df_filtered = df.copy().dropna(subset=['Function_Order'])
df_filtered['Time_Hover'] = df_filtered.groupby('Order')['Time'].transform('last')

hover_text = []
for index, row in df_filtered.iterrows():
    hover_text.append(f"Tónina: {row['Key']}, Takt: {row['Order']}, Doba: {row['End']}, Harmonická funkcia: {row['Function']}")

fig = go.Figure(data=go.Scatter(x=df_filtered['Time_Hover'], y=df_filtered['Function_Order'], mode='markers',
                                marker=dict(size=10, color=df_filtered['Function_Order'], colorscale='Viridis'),
                                hovertext=hover_text))

fig.update_layout(title='Zmeny harmonických funkcií v čase', xaxis_title='Doby', yaxis_title='Harmonická funkcia')
fig.update_yaxes(tickvals=list(function_order.values()), ticktext=list(function_order.keys()))

fig.show()


## Kadencie

In [184]:
df_filtered = df.copy().dropna(subset=['Sequence', 'Function']).drop(columns=['File', 'Time', 'Key_Value', 'Key_Color', 'Function_Order', 'Root'])
df_filtered

Unnamed: 0,Order,End,Chord,Key,Function,Sequence
144,88,0.5,A MAJOR_TRIAD,A DUR,T (I),"[T (I), VI, D (V), T (I)]"
145,88,1.0,F# MINOR_TRIAD,A DUR,VI,"[T (I), VI, D (V), T (I)]"
147,88,2.0,E MAJOR_TRIAD,A DUR,D (V),"[T (I), VI, D (V), T (I)]"
149,90,1.5,F# MINOR_TRIAD,F# MOLL,T (I),"[T (I), VI, D (V), T (I)]"
179,104,0.5,A MAJOR_TRIAD,A DUR,T (I),"[T (I), VI, D (V), T (I)]"
180,104,1.0,F# MINOR_TRIAD,A DUR,VI,"[T (I), VI, D (V), T (I)]"
182,104,2.0,E MAJOR_TRIAD,A DUR,D (V),"[T (I), VI, D (V), T (I)]"
184,106,1.5,F# MINOR_TRIAD,F# MOLL,T (I),"[T (I), VI, D (V), T (I)]"
