In [1]:
push!(LOAD_PATH,".")
using CSV, DataFrames, Plots
using Chakra, Viewpoints, PPM
using Nova

┌ Info: Precompiling Chakra [top-level]
└ @ Base loading.jl:1423
┌ Info: Precompiling Viewpoints [top-level]
└ @ Base loading.jl:1423
┌ Info: Precompiling PPM [top-level]
└ @ Base loading.jl:1423
┌ Info: Precompiling Nova [top-level]
└ @ Base loading.jl:1423


In [2]:
melodies = map(fn->string("nova/",fn,"/track1"),readdir("nova"))
id_seqs = [obind(find(m,data),particles) for m in melodies]
seqs = [Chakra.sequence(s,data) for s in id_seqs]
pitch_view = [PPM.View(seq,vp(:pitch),vp(:pitch)) for seq in seqs];

# Parameters

In [3]:
a = Set([54:77...,79,81])
b = Backoff()
e = A()
u = true
o = Bounded(5)
#o = Unbounded();
idyom_model = "./idyom-data/STMA5.csv"

"./idyom-data/STMA5.csv"

# Compute Models

In [4]:
@time pitch_stm = PPM.ppm_stm(pitch_view,a,b,e,u,o);
PPM.mean_infcontent(pitch_stm)

  1.676746 seconds (13.47 M allocations: 1.367 GiB, 15.96% gc time, 34.97% compilation time)


3.1252478390427516

In [5]:
@time pitch_ltm = PPM.ppm_ltm(pitch_view,a,b,e,u,o);
PPM.mean_infcontent(pitch_ltm)

  2.690253 seconds (17.58 M allocations: 1.706 GiB, 26.13% gc time, 26.17% compilation time)


3.1884859734396596

In [6]:
@time pitch_ltm_plus = PPM.ppm_ltm_plus(pitch_view,a,b,e,u,o);
PPM.mean_infcontent(pitch_ltm_plus)

  2.427866 seconds (18.06 M allocations: 1.849 GiB, 32.79% gc time, 8.67% compilation time)


2.9235090946412456

In [7]:
@time pitch_both = PPM.ppm_both(pitch_view,a,b,e,u,o);
PPM.mean_infcontent(pitch_both)

  3.233354 seconds (30.02 M allocations: 3.060 GiB, 19.51% gc time, 17.60% compilation time)


2.576812470419125

In [None]:
@time pitch_both_plus = PPM.ppm_both_plus(pitch_view,a,b,e,u,o);
PPM.mean_infcontent(pitch_both_plus)

# Print Table 

In [None]:
pitch_table = PPM.todataframe(pitch_stm)

In [None]:
plot(pitch_table.Prob[1:106])
plot!(pitch_table.IC[1:106])
plot!(pitch_table.H[1:106])
plot!(size=(900,300))

# Comparison with IDyOM

In [None]:
idyom_data = CSV.File(idyom_model) |> DataFrame;
idyom_prob = idyom_data[!,"probability"];
idyom_ic = idyom_data[!,"information.content"]
DataFrame(Symbol = pitch_table.Symbol, Idyom = idyom_prob, Julia = pitch_table.Prob)

In [None]:
plot(idyom_ic[100:300])
plot!(pitch_table.IC[100:300])
phrases = findall(x->x==1,idyom_data[!,"phrase"][100:300]);
vline!(phrases, lw = 2)
plot!(size=(900,300))

# Multiple Viewpoint Models

In [None]:
duration_view = [PPM.View(seq,vp(:duration),vp(:pitch)) for seq in seqs];
duration_stm = PPM.ppm_stm(duration_view,a,b,e,u,o);
pitch_duration_stm = [[PPM.combine([p1,p2],0) for (p1,p2) in zip(s,l)] for (s,l) in zip(pitch_stm,duration_stm)];
pitch_duration_table = PPM.todataframe(pitch_duration_stm);
plot(pitch_duration_table.Prob[1:106])
plot!(pitch_duration_table.IC[1:106])
plot!(pitch_duration_table.H[1:106])
vline!(phrases, lw = 2)
plot!(size=(900,300))