In [None]:
!pip3 install whisper-timestamped

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
import json
import IPython.display as ipd

import librosa
import librosa.display

import soundfile as sf
import wave
import torch
import torchaudio

from google.colab import drive

from scipy.io.wavfile import write

import whisper_timestamped as whisper

import utils as u

In [None]:
# mount google drive
# change data_path as needed to wherever you want to put output
drive.mount('/content/drive')
data_path = "/content/drive/MyDrive/megaphone/audio_samples"

In [None]:
# these are the pitch adjustments that will be made, in semitones
d_pitches = [-2,-1,0,1,2]
# these are the percentages that the track or word will be slowed
p_slowers = [0.15, 0.1, 0.08, 0.05, 0.03]
# name of the file you are using
file_path = "megaphone_test_audio_small.mp3"
# common prefix for all saved files
save_prefix = "7_19_v3"

In [None]:
save_path = data_path+"/"+save_prefix

In [None]:
save_path

In [None]:
# load the audio sample
y,sr = librosa.load(file_path)

In [None]:
# run the audio through whisper to get the words and timestamps (start + end times of words)

audio = whisper.load_audio(file_path)

model = whisper.load_model("tiny", device="cpu")

result = whisper.transcribe(model, audio, language="en")

result_formatted = json.dumps(result, indent = 2, ensure_ascii = False)

result_dict = json.loads(result_formatted)
print(result_dict)

In [None]:
# sometimes the words appear in the 0th index, sometimes in 1... idk why
try:
  result_dict_indexed = result_dict["segments"][1]["words"]
except:
  result_dict_indexed = result_dict["segments"][0]["words"]

In [None]:
# get words and start times
start_times = []
words = []
for word in result_dict_indexed:
  start_times.append(word["start"])
  words.append(word["text"])

In [None]:
# names of directories
file_partial_names = ["all_slowed",
              "alt_speed",
              "random_word_slower_sample",
              "chosen_word_slower_sample",
              "all_pitched_down",
              "alt_pitch",
              "random_word_pitched_down_sample",
              "chosen_word_pitched_down_sample",
              "all_slow_rand_pitched",
              "all_slow_chosen_pitched",
              "all_pitched_rand_slow",
              "all_pitched_chosen_slow"
              ]

directories = [data_path + "/" + save_prefix + "/" + x for x in file_partial_names]

In [None]:
# write files with this naming convetion
def write1():
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+  "$all_slowed.wav", sr, all_slowed)
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+ "$alt_speed.wav", sr, alt_speed)
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+ "$random_word_slower_sample.wav", sr, random_word_slower_sample)
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+ "$chosen_word_slower_sample.wav", sr, chosen_word_slower_sample)

def write2():
  write(save_prefix + file_path.split(".")[0] + str(d_pitch)+ "$all_pitched_down.wav", sr, all_pitched_down)
  write(save_prefix + file_path.split(".")[0] + str(d_pitch)+ "$alt_pitch.wav", sr, alt_pitch)
  write(save_prefix + file_path.split(".")[0] + str(d_pitch)+ "$random_word_pitched_down_sample.wav", sr, random_word_pitched_down_sample)
  write(save_prefix + file_path.split(".")[0] + str(d_pitch)+ "$chosen_word_pitched_down_sample.wav", sr, chosen_word_pitched_down_sample)

def write3():
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+ str(d_pitch)+ "$all_slow_rand_pitched.wav", sr, all_slow_rand_pitched)
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+ str(d_pitch)+ "$all_slow_chosen_pitched.wav", sr, all_slow_chosen_pitched)
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+ str(d_pitch)+ "$all_pitched_rand_slow.wav", sr, all_pitched_rand_slow)
  write(save_prefix + file_path.split(".")[0] + str(p_slow)+ str(d_pitch)+ "$all_pitched_chosen_slow.wav", sr, all_pitched_chosen_slow)

In [None]:
# make directories in google drive for each sample
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}

!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/all_slowed
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/alt_speed
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/random_word_slower_sample
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/chosen_word_slower_sample

!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/all_pitched_down
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/alt_pitch
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/random_word_pitched_down_sample
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/chosen_word_pitched_down_sample

!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/all_slow_rand_pitched
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/all_slow_chosen_pitched
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/all_pitched_rand_slow
!mkdir -p /content/drive/MyDrive/megaphone/audio_samples/{save_prefix}/all_pitched_chosen_slow

In [None]:
# generate and save all slow-only alterations
for p_slow in p_slowers:
  all_slowed = u.all_slow(y, p_slow)
  alt_speed = u.alternate_speed(y, p_slow, start_times)
  random_word_slower_sample = u.random_word_slow(y, p_slow, start_times)
  chosen_word_slower_sample = u.chosen_word_slower(y, p_slow, start_times, words, pos = "verb")
  write1()

# generate and save all pitch-only alterations
for d_pitch in d_pitches:
  all_pitched_down = u.all_pitched_down(y, d_pitch)
  alt_pitch = u.alternate_pitch(y, d_pitch, start_times)
  random_word_pitched_down_sample = u.random_word_pitched_down(y, d_pitch, start_times)
  chosen_word_pitched_down_sample = u.chosen_word_pitched_down(y, d_pitch, start_times, words, pos = "verb")
  write2()


# generate and save all slow and pitch alterations
for p_slow in p_slowers:
  for d_pitch in d_pitches:
    all_slow_rand_pitched = u.all_slow_random_word_pitched_down(y, p_slow, d_pitch, start_times)
    all_slow_chosen_pitched = u.all_slow_chosen_word_pitched_down(y, p_slow, d_pitch, start_times, words, "verb")
    try:
      all_pitched_rand_slow = u.all_pitched_down_random_slowed(y, p_slow, d_pitch, start_times)
    except:
      pass
    try:
      all_pitched_chosen_slow = u.all_pitched_down_chosen_word_slowed(y, p_slow, d_pitch, start_times, words, "verb")
    except:
      pass
    write3()

In [None]:
import shutil

# move files from colab to drive
def move_wav_files_to_drive(source_folder, destination_folder):
    import os
    source_folder_path = source_folder
    destination_folder_path = destination_folder

    # Get a list of all files in the source folder
    files = os.listdir(source_folder_path)

    # Iterate through files and move WAV files to the destination folder
    for file in files:
        if file.lower().endswith(".wav"):
          #print(file.split("$"))
          to_match = file.split("$")[1].split(".")[0]
          print(to_match)
          for dir in os.listdir(destination_folder):
            if to_match == dir:
              try:
                shutil.move(os.path.join(source_folder_path, file), os.path.join(destination_folder_path, dir, file))
                print(f"Moved {file} to {destination_folder}")
              except Exception as e:
                print(f"Error moving {file}: {e}")


In [None]:
# move files from colab to drive
import os
current_dir = os.getcwd()
move_wav_files_to_drive(current_dir, save_path)