In [2]:
using DelimitedFiles
using Distances
using FileIO
using LinearAlgebra
using Random
using Statistics
using DataFrames
using DataFramesMeta
using Plots
using PyCall
using CSV

In [3]:
m21 = pyimport("music21") #music21 manages to load successfully the musicxml files, while musicxml Julia package doesn't.

PyObject <module 'music21' from '/home/alfredo/.local/lib/python3.10/site-packages/music21/__init__.py'>

In [4]:
#functions from personal packages.
git_path = "/home/alfredo/Git/CoE-Testing/src/"
include(joinpath(git_path,"CEGFunctions.jl"))
include(joinpath(git_path,"Constr_series.jl"))
include(joinpath(git_path,"IM_Functions.jl"))
include(joinpath(git_path,"StochasticFunctions.jl"))

get_stochastic_kseq (generic function with 1 method)

In [5]:
#More functions...
function get_xml_df(piece_xml)
    piece = get_piece_by_measure(piece_xml, csv=false)
    df_piece = DataFrame(
        :Measure => convert(Array{Int64,1},piece[:,1]),
        :TimeSignature => piece[:,2],
        :StartQuarter => piece[:,3],
        :EndQuarter => piece[:,4],
        :Duration => piece[:,5],
        :Pitch => convert(Array{Int64,1},piece[:,6])
    )
    return df_piece
end
function get_csv_df(piece_csv)
    piece = get_piece_by_measure(piece_csv, csv=true)[1]
    num_mea = []
    for nm in 1:length(piece)
        push!(num_mea,[nm for i in 1:size(piece[nm],1)])
    end
    num_mea = vcat(num_mea...)
    piece = vcat(piece...)
    df_piece = DataFrame(
        :Measure => convert(Array{Int64,1},num_mea),
        :TimeSignature => piece[:,2],
        :StartTime => piece[:,3],
        :EndTime => piece[:,4],
        :Duration => piece[:,5],
        :Pitch => convert(Array{Int64,1},piece[:,6])
    )
    return df_piece
end
function fastuniq(v)
  v1 = Vector{eltype(v)}()
  if length(v)>0
    laste = v[1]
    push!(v1,laste)
    for e in v
      if e != laste
        laste = e
        push!(v1,laste)
      end
    end
  end
  return v1
end
get_entropy(probs) = mapreduce(x->  - x * log2(x), +, probs)
function get_key_IC(list_keys)
    prob_keys = convert(Array{Float64,1}, list_keys[:,2])
    return - log2(prob_keys[1])
end
function get_distance_ces(ce1, ce2)
    z_dif = ce2[3] - ce1[3]
    while abs(z_dif) > h_octav / 2 #translating over z to be in the same octave (same CE region)
        if z_dif > 0
            ce2[3] = ce2[3] - h_octav
        else
            ce2[3] = ce2[3] + h_octav
        end
        z_dif = ce2[3] - ce1[3]
    end
    return round(euclidean(ce1,ce2), digits = 4)
end
function divide_chunk_notes(chunk, w_s, n_c)
    st_m = minimum(chunk[:,3])
    c_out = []
    for i in 1:n_c
        push!(c_out, chunk[findall(x -> st_m + w_s * (i - 1) <= x < st_m + w_s * i, chunk[:,3]),:])
    end
    return filter(x-> !isempty(x),c_out)
end
function get_distance_to_keys(c_i)
    d_to_keys = round.(map(x-> euclidean(c_i, x), pos_all_keys), digits = 4) #computing the eclidean distance to all keys

    ranking = sortperm(d_to_keys) #ranking the distances from the closest to the farthest

    return [all_keys[ranking] d_to_keys[ranking] pos_all_keys[ranking]][1:12,:]
end
function cluster_notes(ptcs)
    p_m12 = map(x-> mod(x,12),ptcs)
    spi_notes = get_cfpitch(p_m12)
    low_notes = findall(x-> x<6, spi_notes)
    high_notes = findall(x-> x>=6, spi_notes)
    if !isempty(high_notes) && !isempty(low_notes)
        if length(high_notes) < length(low_notes)
            for i in 1:length(high_notes)
                spi_notes = shift_outlier(spi_notes, high_notes[i])
            end
        else
            for i in 1:length(low_notes)
                spi_notes = shift_outlier(spi_notes, low_notes[i])
            end
        end
    end
    dmean = Float64[]
    oliers = Int64[]
    new_spi = []
    conv = false
    while conv == false
        dt_mean = zeros(length(spi_notes))
        for i in 1:length(spi_notes)
            dt_mean[i] = abs(spi_notes[i] - mean(spi_notes[1:end .!= i]))
        end
        d, olier = findmax(dt_mean)
        push!(dmean, mean(dt_mean)); push!(oliers, olier); push!(new_spi, spi_notes)
        #println(mean(dt_mean),'\t', olier, '\t', p_cf)
        #println(mean(dt_mean),'\t', olier)
        if length(oliers) > 10 && length(unique(oliers[end-4:end])) <= 2
            conv = true
            break
        end
        spi_notes = shift_outlier(spi_notes, olier)
    end
    return new_spi[findmin(dmean)[2]]
end
function shift_outlier(notes, olier)
    dif = notes[olier] - median(notes)
    notes_new = copy(notes)
    if dif > 0
        notes_new[olier] = notes_new[olier] - 12
    elseif dif < 0
        notes_new[olier] = notes_new[olier] + 12
    end
    return notes_new
end
function get_center_effect(chunk_notes; r=1, h=sqrt(2/15), mod_12=false,all_keys=all_keys, pos_all_keys=pos_all_keys, sbeat_w=[[1.],[1.]], lin_w=1)    
    ptcs = chunk_notes[:,6]
    durs = chunk_notes[:,5]
    pbeat = chunk_notes[:,1]
    beat_w = ones(length(durs)) #array of the beat weights
    for b = 1:length(sbeat_w[1])
        loc_b = findall(x-> x==sbeat_w[1][b], pbeat) #finding all notes that start at beat sbeat_w[1][b]
        beat_w[loc_b] .= sbeat_w[2][b] #this is the weight.
    end
    notas, n_we = get_local_lin_w(ptcs, lin_w) #doing the linear weight in the pitches
    ii = vcat(map(x-> findall(y-> y==x, notas), ptcs)...)
    #println(ptcs)
    b_wei = n_we[ii] #getting the linear weight for every note i n the array of pitches
    ###DO A FUNCTION FROM HERE
    spi_ix = cluster_notes(ptcs) .+ 24
    #TO HERE
    spi_p = map(x-> get_pitch(x, r=r, h=h), spi_ix) #getting the location (x,y,z) for each pitch
    t_ws = map((x,y,z)-> x*y*z, beat_w,durs, b_wei) #computing the total weights
    cv_i = map((x,y)-> x*y, t_ws, spi_p) / sum(t_ws) #computing the location of the pitches with their relative weights
    c_i = sum(cv_i) #finding the center of effect
    return c_i
end
function get_key_ent(key_list)
    dists = map(x-> exp(-12 * x), Float64.(key_list[:,2]))
    key_p = dists ./ sum(dists)
    return get_entropy(key_p)
end

get_key_ent (generic function with 1 method)

In [6]:
xml_path = "/home/alfredo/MusicPenn/Beethoven_HAnalysis/XMLFiles" #xml files 
adata_path = "/home/alfredo/MusicPenn/Beethoven_HAnalysis/AnnotationsTSV" #annotated data
out_path = "/home/alfredo/MusicPenn/Beethoven_HAnalysis/KeyCallCSV"
xml_list = readdir(xml_path)
adata_list = readdir(adata_path);

In [7]:
opus_number = map(x-> join(split(x, "_")[1:2],"_"), xml_list)
u_opus = unique(opus_number);

70-element Vector{String}:
 "op127_no12"
 "op127_no12"
 "op127_no12"
 "op127_no12"
 "op130_no13"
 "op130_no13"
 "op130_no13"
 "op130_no13"
 "op130_no13"
 "op130_no13"
 ⋮
 "op59_no9"
 "op74_no10"
 "op74_no10"
 "op74_no10"
 "op74_no10"
 "op95_no11"
 "op95_no11"
 "op95_no11"
 "op95_no11"

In [9]:
ix_all = [findall(x-> occursin(u_opus[i], x), adata_list) for i in 1:length(u_opus)]

16-element Vector{Vector{Int64}}:
 [1, 2, 3, 4]
 [5, 6, 7, 8, 9, 10]
 [11, 12, 13, 14, 15, 16, 17]
 [18, 19, 20, 21, 22]
 [23, 24, 25, 26]
 [27, 28, 29, 30]
 [31, 32, 33, 34]
 [35, 36, 37, 38]
 [39, 40, 41, 42]
 [43, 44, 45, 46]
 [47, 48, 49, 50]
 [51, 52, 53, 54]
 [55, 56, 57, 58]
 [59, 60, 61, 62]
 [63, 64, 65, 66]
 [67, 68, 69, 70]

In [50]:

for xml_i in 1:length(xml_list)
    local fh_kseq
    #loading csv from midi
    piece_xml = m21.converter.parse(joinpath(xml_path,xml_list[xml_i]))


    #loading hand-annotation data
    piece_ann = CSV.read(joinpath(adata_path,adata_list[xml_i]), DataFrame) 
    df_piece =  get_xml_df(piece_xml)


    separated_measures = groupby(df_piece, :Measure)


    kseq = Any[]
    n_bar = Int[]
    unc_seq = Float64[]
    for m in separated_measures
        out = get_distance_to_keys(get_center_effect(Matrix(m)))
        push!(n_bar, m[1,:Measure])
        push!(kseq, out[1,1])
        push!(unc_seq, round(get_key_ent(out),digits=4))
    end
    #fundamental key
    fun_key = get_rank_freq(kseq)[1,1] #Getting the most repeated key (expected to be the global key)
    #fun_key = piece_ann[1,:global_key]
    try
        fh_kseq = funhar_seq(kseq, fun_key) #mapping the key sequence to a functional harmony sequence, taking as reference the global key
    catch
        try
            new_funkey = key_translate[fun_key]
            fh_kseq = funhar_seq(kseq, new_funkey)
        catch
            fun_key = get_rank_freq(kseq)[1,1]
            fh_kseq = funhar_seq(kseq, fun_key)
        end
    end
    nmea = piece_ann.measure
    fh_out = []
    unc_out = []
    kseq_out = []
    for n_m in nmea
        loc = findfirst(x-> x==n_m, n_bar)
        if !isnothing(loc)
            push!(fh_out, fh_kseq[loc])
            push!(unc_out, unc_seq[loc])
            push!(kseq_out, kseq[loc])
        else
            push!(fh_out, "N/A")
            push!(unc_out, "N/A")
            push!(kseq_out, "N/A")
        end
    end

    g_key = [get_rank_freq(kseq)[1,1] for i in length(fh_out)]
    #construct the output dataframe
    df_out = DataFrame(
        :Measure => piece_ann[!,:measure],
        :Global_Key => piece_ann[!,:global_key],
        :Local_Key => piece_ann[!,:local_key],
        :Chord => piece_ann[!,:chord],
        :Relative_Numeral => piece_ann[!,:numeral],
        :CoE_KeyCall => fh_out,
        :CoE_Uncert => unc_out,
        :CoE_KeySequence => kseq_out,
        :CoE_GlobalKey => g_key
    )
    #exporting...
    xml_file = xml_list[xml_i]
    name_out = "CoEKeyCallNEW-$(join([split(xml_file,".")[1] "csv"],"."))"
    CSV.write(joinpath(out_path,name_out), df_out, header=true)
    println("File $(xml_file) DONE!")
end

File op127_no12_mov1.musicxml DONE!


File op127_no12_mov2.musicxml DONE!


File op127_no12_mov3.musicxml DONE!


File op127_no12_mov4.musicxml DONE!


File op130_no13_mov1.musicxml DONE!


File op130_no13_mov2.musicxml DONE!


File op130_no13_mov3.musicxml DONE!


File op130_no13_mov4.musicxml DONE!


File op130_no13_mov5.musicxml DONE!


File op130_no13_mov6.musicxml DONE!


File op131_no14_mov1.musicxml DONE!


File op131_no14_mov2.musicxml DONE!


File op131_no14_mov3.musicxml DONE!


File op131_no14_mov4.musicxml DONE!


File op131_no14_mov5.musicxml DONE!


File op131_no14_mov6.musicxml DONE!


File op131_no14_mov7.musicxml DONE!


File op132_no15_mov1.musicxml DONE!


File op132_no15_mov2.musicxml DONE!


File op132_no15_mov3.musicxml DONE!


File op132_no15_mov4.musicxml DONE!


File op132_no15_mov5.musicxml DONE!


File op135_no16_mov1.musicxml DONE!


File op135_no16_mov2.musicxml DONE!


File op135_no16_mov3.musicxml DONE!


File op135_no16_mov4.musicxml DONE!


File op18_no1_mov1.musicxml DONE!


File op18_no1_mov2.musicxml DONE!


File op18_no1_mov3.musicxml DONE!


File op18_no1_mov4.musicxml DONE!


File op18_no2_mov1.musicxml DONE!


File op18_no2_mov2.musicxml DONE!


File op18_no2_mov3.musicxml DONE!


File op18_no2_mov4.musicxml DONE!


File op18_no3_mov1.musicxml DONE!


File op18_no3_mov2.musicxml DONE!


File op18_no3_mov3.musicxml DONE!


File op18_no3_mov4.musicxml DONE!


File op18_no4_mov1.musicxml DONE!


File op18_no4_mov2.musicxml DONE!


File op18_no4_mov3.musicxml DONE!


File op18_no4_mov4.musicxml DONE!


File op18_no5_mov1.musicxml DONE!


File op18_no5_mov2.musicxml DONE!


File op18_no5_mov3.musicxml DONE!


File op18_no5_mov4.musicxml DONE!


File op18_no6_mov1.musicxml DONE!


File op18_no6_mov2.musicxml DONE!


File op18_no6_mov3.musicxml DONE!


File op18_no6_mov4.musicxml DONE!


File op59_no7_mov1.musicxml DONE!


File op59_no7_mov2.musicxml DONE!


File op59_no7_mov3.musicxml DONE!


File op59_no7_mov4.musicxml DONE!


File op59_no8_mov1.musicxml DONE!


File op59_no8_mov2.musicxml DONE!


File op59_no8_mov3.musicxml DONE!


File op59_no8_mov4.musicxml DONE!


File op59_no9_mov1.musicxml DONE!


File op59_no9_mov2.musicxml DONE!


File op59_no9_mov3.musicxml DONE!


File op59_no9_mov4.musicxml DONE!


File op74_no10_mov1.musicxml DONE!


File op74_no10_mov2.musicxml DONE!


File op74_no10_mov3.musicxml DONE!


File op74_no10_mov4.musicxml DONE!


File op95_no11_mov1.musicxml DONE!


File op95_no11_mov2.musicxml DONE!


File op95_no11_mov3.musicxml DONE!


File op95_no11_mov4.musicxml DONE!


In [51]:
csv_files = readdir(out_path)
compare_list = csv_files[findall(x-> occursin(r"NEW-",x),csv_files)];

In [52]:

unc_wrong = []
fwrongs = []
mwrongs = []
good_guess = []
for f in 1:length(compare_list)
    bars = 0
    g_guess = 0
    df_ann = CSV.read(joinpath(out_path,compare_list[f]),DataFrame)
    
    g_key = get_rank_freq(df_ann[!,:Local_Key])[1,1]
    ann_meas = groupby(df_ann[df_ann.Local_Key .==g_key,:],:Measure)
    #ann_meas = groupby(df_ann, :Measure)
    
    tn_m = length(ann_meas)
    for i in 1:tn_m
        
        ann_keys = vcat(ann_meas[i][:,:Local_Key]..., ann_meas[i][:,:Relative_Numeral]..., ann_meas[i][:,:Chord]...)
        #ann_keys = vcat(ann_meas[i][:,:Relative_Numeral]...)
        coe_key = ann_meas[i][1,:CoE_KeyCall]
        if !isempty(findall(x-> x==coe_key, coalesce.(ann_keys,"N/A")))
            g_guess += 1
        else
            push!(unc_wrong, ann_meas[i][1,:CoE_Uncert])
            push!(fwrongs, f)
            push!(mwrongs, i)
        end
        bars +=1
    end
    push!(good_guess, g_guess / bars)
end

In [64]:
println("the median accuracy for all the movements is: ", round(median(good_guess)*100,digits=3) ,"%")

the median accuracy for all the movements is: 67.74%
