In [1]:
import sys
import numpy as np
import argparse
sys.path.insert(1, 'C:\\Users\\ptut0\Documents\\vocal_ambiguity\\cleese_clone\\')
import cleese_stim as cleese
from cleese_stim.engines import PhaseVocoder
import tomli

from IPython.display import Markdown, display, Audio
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os

In [2]:
input_file = "./sounds/bottle_feel.wav"
config_dict = {
  'stretch': "./configs/random_stretch_profile.toml",
  'pitch': "./configs/random_pitch_profile.toml",
  'eq': "./configs/random_timbre_profile.toml"
}

transforms = ['stretch', 'pitch', 'eq']

#The timepoints for each of the words of intrest for each phrase
time_points = {
  "feel" : [2.51, 2.71],
  "fill" : [2.32, 2.51],
  "beat" : [2.51, 2.69],
  "bit" : [2.41, 2.53],
  "fou" : [],
  "fut" : [0.23, 0.51],
  "brillant" : [2.49, 2.90],
  "bruyant" : [2.42, 2.90]
}

wave_in, sr, _ = PhaseVocoder.wavRead(input_file)

duration = len(wave_in) / float(sr)

for key in config_dict:
  f = open(config_dict[key], "rb")
  if key == 'stretch':
    stretch_config = tomli.load(f)
  if key == 'pitch':
    pitch_config = tomli.load(f)
  if key == 'eq':
    eq_config = tomli.load(f)

word = input_file.split("_")[-1]
word = word.split(".")[0]

time_points_word = np.array(time_points[word]) # values found in audacity
# phrase before the word
pre_duration = time_points_word[0]
# phrase after the word
post_duration = duration - time_points_word[1]
word_duration = time_points_word[1] - time_points_word[0]

def generateCustomBP(transform, pre_duration, post_duration, word_duration, config, config_file):
  bpf_time_pre, num_points, end_on_trans = PhaseVocoder.create_BPF_time_vec(
      pre_duration,
      config[transform]
  )
  bpf_time_post, num_points, end_on_trans = PhaseVocoder.create_BPF_time_vec(
      post_duration,
      config[transform]
  )
  bpf_time_word, num_points, end_on_trans = PhaseVocoder.create_BPF_time_vec(
      word_duration,
      config[transform]
  )
  # adjust to correct time points
  bpf_time_post = [bp + time_points_word[1] for bp in bpf_time_post]
  bpf_time_word = [bp + time_points_word[0] for bp in bpf_time_word]
  bpf_time_pre = list(bpf_time_pre)
  
  # drop duplicate time between bpfs
  del bpf_time_post[0]
  del bpf_time_pre[-1]
  
  full_time_points = bpf_time_pre + bpf_time_word + bpf_time_post

  num_points = len(full_time_points)
  
  eqFreqVec = None
  if transform == 'eq':
    eqFreqVec = PhaseVocoder.createBPFfreqs(config)

  # create a bpf for each of the transformations
  bpf_list = (PhaseVocoder.create_BPF(
      transform,
      config_file,
      full_time_points,
      num_points,
      end_on_trans,
      eqFreqVec
  ))

  return bpf_list

if 'stretch' in transforms:
  bpf_list = generateCustomBP('stretch', pre_duration, post_duration, word_duration, stretch_config, config_dict['stretch'])
  wave_out,bpf_out = cleese.process_data(
      PhaseVocoder,
      wave_in,
      config_dict['stretch'],
      sample_rate=sr,
      BPF=bpf_list
  )
  wave_in = wave_out

if 'pitch' in transforms:
  bpf_list = generateCustomBP('pitch', pre_duration, post_duration, word_duration, pitch_config, config_dict['pitch'])
  wave_out,bpf_out = cleese.process_data(
      PhaseVocoder,
      wave_in,
      config_dict['pitch'],
      sample_rate=sr,
      BPF=bpf_list
  )
  wave_in = wave_out

if 'eq' in transforms:
  bpf_list = generateCustomBP('eq', pre_duration, post_duration, word_duration, eq_config, config_dict['eq'])
  wave_out,bpf_out = cleese.process_data(
      PhaseVocoder,
      wave_in,
      config_dict['eq'],
      sample_rate=sr,
      BPF=bpf_list
  )
  wave_in = wave_out

file_name = os.path.splitext(os.path.basename(input_file))[0]+'_out_test.wav'
PhaseVocoder.wavWrite(wave_out,file_name,sr)

44100
stretch
None
None


WARN: stereo file detected. Reading only left channel.
stretch variation 1/1


pitch
None
None


pitch variation 1/1


eq
am I eq
[    0.           291.48552026   704.34790983  1289.12945428
  2117.41864548  3290.61398785  4952.33712269  7306.01487537
 10639.78260555 15361.75750895 22050.        ]
[    0.           291.48552026   704.34790983  1289.12945428
  2117.41864548  3290.61398785  4952.33712269  7306.01487537
 10639.78260555 15361.75750895 22050.        ]
<class 'numpy.ndarray'>
[    0.           291.48552026   704.34790983  1289.12945428
  2117.41864548  3290.61398785  4952.33712269  7306.01487537
 10639.78260555 15361.75750895 22050.        ]
<class 'numpy.ndarray'>
[    0.           291.48552026   704.34790983  1289.12945428
  2117.41864548  3290.61398785  4952.33712269  7306.01487537
 10639.78260555 15361.75750895 22050.        ]
<class 'numpy.ndarray'>
[    0.           291.48552026   704.34790983  1289.12945428
  2117.41864548  3290.61398785  4952.33712269  7306.01487537
 10639.78260555 15361.75750895 22050.        ]
<class 'numpy.ndarray'>
[    0.           291.48552026   704.34790983  1

eq variation 1/1
