Permalink
Browse files

synth_song & support updated for Jehan-derived resynthesis

  • Loading branch information...
1 parent 748d117 commit b02293dd57adf9750f6a49609ec2ee0ca12880fa @dpwe dpwe committed Nov 13, 2011
Showing with 54 additions and 32 deletions.
  1. +39 −24 MatlabSrc/recons_env_h5.m
  2. +15 −8 MatlabSrc/synth_song.m
View
@@ -1,20 +1,17 @@
-function E = recons_env_h5(A,B)
-% E = recons_env_h5(A,B)
+function E = recons_env_h5(A)
+% E = recons_env_h5(A)
% Reconstruct the time-frequency envelope of a sound from the EN
% timbre descriptors. A is a EN HDF5_Song structure including
-% segment start times and segment durations. B is a
-% matrix of N x M x 12 envelope basis functions. Reconstruct
+% segment start times and segment durations. Reconstruct
% each segment envelope from the A.timbre coefficient weights,
% resample it, insert it into E.
% 2010-05-03 Dan Ellis dpwe@ee.columbia.edu
-global ENTimbreBasis
+global ENTimbreTJ
-if nargin < 2
- if length(ENTimbreBasis)==0
- load ENTimbreBasis
- end
- B = ENTimbreBasis;
+if length(ENTimbreTJ) == 0
+ [p,n,e] = fileparts(which('recons_env_h5'));
+ load(fullfile(p,'ENTimbreTJ.mat'));
end
segments = A.get_segments_start()';
@@ -25,36 +22,54 @@
segmentduration = [segmentduration, segmentduration(end)];
maxtime = segments(end)+segmentduration(end);
-nchan = size(B,1);
-ncol = size(B,2);
-ntimb = size(B,3);
+bases = ENTimbreTJ.bases;
+bmean = ENTimbreTJ.mean;
+
+nchan = size(bases,1);
+ncols = size(bases,2);
+ntimb = size(bases,3);
-tbase = 0.010;
+%tbase = 0.010;
+tbase = 128/22050;
E = zeros(nchan,ceil(maxtime/tbase)+1);
tt = tbase * [0:(ceil(maxtime/tbase))];
timbre = A.get_segments_timbre();
+% How many uninterpolated frames
+ninitial = 10;
+% How many subsequent interpolated frames
+nfinal = ncols - ninitial;
+
for s = 1:nseg
- ei = zeros(nchan,ncol);
+ % reconstruction starts with mean + constant value of 1st element
+ ei = ENTimbreTJ.mean + timbre(1,s);
for i = 1:ntimb
- ei = ei + squeeze(timbre(i,s)*B(:,:,i));
+ ei = ei + squeeze(timbre(i+1,s)*bases(:,:,i));
end
ei2 = [ei,ei(:,end)];
- % Figure out the actual place to put it
- % Segment covers A.segment(s) to
- % A.segment(s)+A.segmentduration(s) in ncol steps
- % thus each col k of ei corresponds to time
- % A.segment(s)+k*A.segmentduration(s)/ncol
- tk = segments(s)+[0:(ncol-1)]/ncol*segmentduration(s);
+ % First 10 frames are unresampled
+ % next 10 frames are linearly interpolated for the remainder
+ tk = segments(s) + [0:(ninitial-1)]*tbase;
+ tk = [tk, segments(s)+ninitial*tbase ...
+ + [1:(nfinal)]/nfinal*(segmentduration(s)-ninitial*tbase)];
+% plot(tk,'.');
+% title(['segment ', num2str(s)]);
+% pause
trix = find(tt >= tk(1), 1, 'first'):(find(tt<=tk(end), 1, 'last')+1);
tk2 = [tk tk(end)+1];
+ goodtrix = (trix <= size(E,2));
+ trix = trix(goodtrix);
trp = zeros(1,length(trix));
for j = 1:length(trix);
trp(j) = find(tk<=tt(trix(j)), 1,'last');
trp(j) = trp(j) + (tt(trix(j))-tk(trp(j)))/(tk2(trp(j)+1)-tk(trp(j)));
end
-% E(:,trix) = E(:,trix) + idB(A.segmentloudness(s))*(colinterp(ei2,trp).^(1/0.3));
- E(:,trix) = E(:,trix) + (colinterp(ei2,trp).^(1/0.3));
+% E(:,trix) = E(:,trix) +
+% idB(A.segmentloudness(s))*(colinterp(ei2,trp).^(1/0.3));
+% E(:,trix) = E(:,trix) + (colinterp(ei2,trp));
+ % just overwrite any overlap
+% E(:,trix) = 10.^(colinterp(ei2,trp)/20);
+ E(:,trix) = colinterp(ei2,trp);
end
View
@@ -1,15 +1,16 @@
function x = synth_song(M,dur,sr,donoise)
-% x = play_en(M,dur,sr,donoise)
+% x = synth_song(M,dur,sr,donoise)
% Resynthesize audio from an EN analyze structure.
% M is an HDF_Song_File_Reader structure
% x is returned as a waveform synthesized from that data, with
% max duration <dur> secs (duration of song), at sampling rate
% sr (16000 Hz). donoise = 1 => noise excited.
% (based on echonest/play_en.m)
-% 2009-03-11 Dan Ellis dpwe@ee.columbia.edu
+% Now with EN-provided resynthesis parameters.
+% 2011-11-13, 2009-03-11 Dan Ellis dpwe@ee.columbia.edu
if nargin < 2; dur = 0; end
-if nargin < 3; sr = 16000; end
+if nargin < 3; sr = 22050; end
if nargin < 4; donoise = 0; end
% include denormalization by loudness
@@ -33,10 +34,14 @@
%%%%% PUT ENVELOPE RECONSTRUCTION FROM TIMBRE FEATURES IN HERE %%%%%%
-E = recons_env_h5(M);
+EdB = recons_env_h5(M);
+% E returns in (pseudo?) dB; convert to linear
+E = 10.^(EdB/20);
-winlen = round(sr*.025);
-hoplen = round(sr*.010);
+%hoplen = round(sr*.010);
+%winlen = round(sr*.025);
+hoplen = round(sr * 128/22050);
+winlen = round(2.5*hoplen);
fftlen = 2^ceil(log(winlen)/log(2));
if donoise
@@ -45,8 +50,10 @@
X = specgram(x,fftlen,sr,winlen,winlen-hoplen);
end
%X = specgram(randn(1,length(x)),fftlen,sr,winlen,winlen-hoplen);
-M = fft2barkmx(fftlen,sr,size(E,1),1.0);
-DE = M(:,1:(fftlen/2+1))'*E;
+M = fft2jbarkmx(fftlen,sr,size(E,1),1.0);
+M = M(:,1:(fftlen/2+1));
+SM = sum(M);
+DE = diag(1./SM)*M'*E;
if size(DE,2) < size(X,2)
DE(1,size(X,2)) = DE(1,end);
end

0 comments on commit b02293d

Please sign in to comment.