In [None]:
import statistics
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

def decode_audio(audio_binary):
  audio, _ = tf.audio.decode_wav(audio_binary)
  return tf.squeeze(audio, axis=-1)

MAX_LENGTH = 150

def get_spectrogram_tf(waveform):
  # Padding for files with less than 256000 samples
  #print("Len: {}".format(tf.shape(waveform)))
  zero_padding = tf.zeros([10000] - tf.shape(waveform), dtype=tf.float32)

  # Concatenate audio with padding so that all audio clips will be of the 
  # same length
  waveform = tf.cast(waveform, tf.float32)
  #waveform = tf.concat([waveform, zero_padding], 0)

  spectrogram = tf.signal.stft(
      waveform, frame_length=255, frame_step=128)
  print(spectrogram.shape)  

  spectrogram = tf.abs(spectrogram)
  maxes = get_max(spectrogram)
    
  # get the most common high power  
  counts = tf.unique_with_counts(maxes)
  max_power = counts[0][tf.math.argmax(counts[2])]
  
  # cast properly for the tf.slice command
  max_power = tf.cast(max_power, tf.int32)

  spectrogram = tf.slice(spectrogram, begin=[0, max_power], size=[-1, 1])

  output_list = [tf.zeros([1]) for i in range(0, MAX_LENGTH)]
  zero_padding = tf.zeros([MAX_LENGTH, 1])
  for i in tf.range(spectrogram.shape[0]):
    output_list[i] = (spectrogram[i])
    
  return tf.stack(output_list)

import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

# TensorFlow compatible function for determining the peak frequency
def get_max(spectrogram):
    print("Shape: {}".format(spectrogram.shape))
    print("Shape: {}".format(spectrogram[0].shape))

    max_seq_len = spectrogram.shape[0]
    maxes = tf.TensorArray(tf.int64, size=max_seq_len)
    for i in tf.range(max_seq_len):
        max_n = tf.math.argmax(tf.cast(spectrogram[i], tf.int64))
        maxes = maxes.write(i, max_n)
    return maxes.stack()

audio_binary = tf.io.read_file("data/C.wav")
waveform = decode_audio(audio_binary)
spectrogram = get_spectrogram_tf(waveform)
_ = plt.pcolormesh(spectrogram.numpy().T)
    

In [None]:
from scipy.io import wavfile
data = wavfile.read("data/12wpm-0snr.wav")
print(len(data[1]) / 128.0)

In [None]:
output_data = []

low_count = 0
cursor = 0
state = "OUT_OF_LETTER"
i = 0
for frame in spectrogram.numpy():
    sample = frame[0]
    if state == "OUT_OF_LETTER":
        if sample > 5.0:
            prev_state = state
            state = "IN_LETTER"
            print(state)
            low_count = 0
            output_data = []
    elif state == "IN_LETTER":
        chunk = data[1][cursor:cursor+128]
        output_data = np.concatenate((output_data,chunk))
        if sample < 5.0:
            low_count += 1
        else:
            low_count = 0
        
        if low_count > 5:
            prev_state = state
            state = "OUT_OF_LETTER"
            print(state)
            output = np.array(output_data, dtype=np.int16)
            wavfile.write(str("output-{:04d}.wav".format(i)), 8000, output)
            i += 1
    
    cursor += 128
    

In [None]:
def SNR(cw, dB):
    SNR_linear = 10.0**(dB/10.0)
    power = cw.var()
    if power == 0:
        power = 0.1
    noise_power = power/SNR_linear
    noise = np.sqrt(noise_power)*np.random.normal(0,1,len(cw))
    return noise + cw


In [None]:
sample_rate = 8000
def generate_silence(time_units, wpm):
    return np.zeros(int(time_units * sample_rate / wpm))


In [None]:
SNR(generate_silence(10, 5), -6)[0:100]