In [None]:
import stft
import peaks
import phasevocoder
import os
import scipy.io.wavfile as wav
import IPython.display as ipd
import ipywidgets as ipw

In [None]:
MELODIES = {
    "7 Nation Army": [("E", 4), ("E", 4), ("G", 4), ("E", 4), ("D", 4), ("C", 4), ("B", 3)],
    "7 Ds" : [("D", 4)] * 7,
    "C Major scale": [("C", 4), ("D", 4), ("E", 4), ("F", 4), ("G", 4), ("A", 4), ("B", 4), ("C", 5)],
}

AUDIO_DIR = "../audio"
wavfiles = os.listdir(AUDIO_DIR)

In [None]:
def detect_notes(filename, fft_length, freq_thresh, note_gap_time, min_note_length, peak_height):
    fs, data = wav.read(f"{AUDIO_DIR}/{filename}")
    if len(data.shape) == 2:
        data = data[:, 0] # get just one channel

    if max(data) <= 1:
        data = data * 2**15 # convert [-1, 1] to [-2^15, 2^15] 

    ipd.display(ipd.Audio(data, rate=fs))
    
    Zxx, f, t = stft.analysis(data, N=int(fft_length))
    stft.spectrogram(Zxx, f, t, title="Spectrogram of Input Signal")

    peaks.plot_peaks(Zxx, f, t, title="Peaks in Input Signal", height=peak_height)
    notes = peaks.find_notes(Zxx, f, t, freq_thresh, note_gap_time, min_note_length, height=peak_height)
    note_reprs = repr(notes)[1:-1].split(", ")
    print(f"Detected {len(notes)} notes in the signal:", *note_reprs, sep="\n    ")
    peaks.plot_notes(notes, title="Notes in Input Signal") 
    

In [None]:
ui_notes = ipw.interact_manual.options(manual_name="Detect notes!")

ui_notes(detect_notes,
         filename=wavfiles,
         fft_length=ipw.FloatLogSlider(value=1024, base=2, min=8, max=12, step=1),
         freq_thresh=ipw.FloatSlider(value=30, min=10, max=100), 
         note_gap_time=ipw.FloatSlider(value=0.1, min=0.05, max=0.5, step=0.01),
         min_note_length=ipw.FloatSlider(value=0.5, min=0.01, max=1, step=0.01),
         peak_height=ipw.FloatLogSlider(value=150, min=1, max=3, step=0.1))

ipd.display(ui_notes)

In [None]:
def shift_pitch(filename, num_semitones, fft_length):
    fs, data = wav.read(f"{AUDIO_DIR}/{filename}")
    if len(data.shape) == 2:
        data = data[:, 0] # get just one channel

    if max(data) <= 1:
        data = data * 2**15 # convert [-1, 1] to [-2^15, 2^15] 

    ipd.display(ipd.Audio(data, rate=fs))
    
    Zxx, f, t = stft.analysis(data, N=int(fft_length))
    stft.spectrogram(Zxx, f, t, title="Spectrogram of Input Signal")

    freq_ratio = 2**(num_semitones/12)
    shifted = phasevocoder.pitch_shift(data, freq_ratio)
    
    Zxx_shift, f_shift, t_shift = stft.analysis(data, N=int(fft_length))
    stft.spectrogram(Zxx_shift, f_shift, t_shift, title="Spectrogram of Shifted Signal")
    
    ipd.display(ipd.Audio(shifted, rate=fs))
    

In [None]:
ui_shift = ipw.interact_manual.options(manual_name="Shift pitch!")

ui_shift(shift_pitch,
         filename=wavfiles,
         num_semitones=ipw.IntSlider(value=0, min=-12, max=12),
         fft_length=ipw.FloatLogSlider(value=1024, base=2, min=8, max=12, step=1))

ipd.display(ui_shift)

In [None]:
def stftune(filename, melody, fft_length, freq_thresh, note_gap_time, min_note_length, peak_height):
    fs, data = wav.read(f"{AUDIO_DIR}/{filename}")
    if len(data.shape) == 2:
        data = data[:, 0] # get just one channel

    if max(data) <= 1:
        data = data * 2**15 # convert [-1, 1] to [-2^15, 2^15] 

    ipd.display(ipd.Audio(data, rate=fs))
    
    Zxx, f, t = stft.analysis(data, N=int(fft_length))
    stft.spectrogram(Zxx, f, t, title="Spectrogram of Input Signal")

    peaks.plot_peaks(Zxx, f, t, title="Peaks in Input Signal", height=peak_height)
    notes = peaks.find_notes(Zxx, f, t, freq_thresh, note_gap_time, min_note_length, height=peak_height)
    note_reprs = repr(notes)[1:-1].split(", ")
    print(f"Detected {len(notes)} notes in the signal:", *note_reprs, sep="\n    ")
    peaks.plot_notes(notes, title="Notes in Input Signal") 
    
    desired_notes = MELODIES[melody]
    desired_notes_str = " ".join(list(map(lambda t: f"{t[0]}{t[1]}", desired_notes)))
    print(f"Retuned signal to {melody}: {desired_notes_str}")
    retuned = phasevocoder.retune(data, notes, desired_notes)
    ipd.display(ipd.Audio(retuned, rate=fs))
    
    Zxx_retune, f_retune, t_retune = stft.analysis(retuned, N=int(fft_length))
    stft.spectrogram(Zxx_retune, f_retune, t_retune, title="Spectrogram of Retuned Signal")

In [None]:
ui_stftune = ipw.interact_manual.options(manual_name="Run STFTune!")

ui_stftune(stftune,
           filename=wavfiles,
           melody=MELODIES.keys(),
           fft_length=ipw.FloatLogSlider(value=1024, base=2, min=8, max=12, step=1),
           freq_thresh=ipw.FloatSlider(value=30, min=10, max=100), 
           note_gap_time=ipw.FloatSlider(value=0.1, min=0.05, max=0.5, step=0.01),
           min_note_length=ipw.FloatSlider(value=0.5, min=0.01, max=1, step=0.01),
           peak_height=ipw.FloatLogSlider(value=150, min=1, max=3, step=0.1))

ipd.display(ui_stftune)