In [2]:
import pyaudio
import os
os.chdir('../')
import struct
import numpy as np
import matplotlib.pyplot as plt
import time
from tkinter import TclError

# use this backend to display in separate Tk window
%matplotlib tk

# constants
CHUNK = 1024 * 2             # samples per frame
FORMAT = pyaudio.paInt16     # audio format (bytes per sample?)
CHANNELS = 1                 # single channel for microphone
RATE = 44100                 # samples per second

ModuleNotFoundError: No module named 'pyaudio'

In [4]:
# create matplotlib figure and axes
fig, ax = plt.subplots(1, figsize=(15, 7))

# pyaudio class instance
p = pyaudio.PyAudio()

# stream object to get data from microphone
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    output=True,
    frames_per_buffer=CHUNK
)

# variable for plotting
x = np.arange(0, 2 * CHUNK, 2)

# create a line object with random data
line, = ax.plot(x, np.random.rand(CHUNK), '-', lw=2)

# basic formatting for the axes
ax.set_title('AUDIO WAVEFORM')
ax.set_xlabel('samples')
ax.set_ylabel('volume')
ax.set_ylim(0, 255)
ax.set_xlim(0, 2 * CHUNK)
plt.setp(ax, xticks=[0, CHUNK, 2 * CHUNK], yticks=[0, 128, 255])

# show the plot
plt.show(block=False)

print('stream started')

# for measuring frame rate
frame_count = 0
start_time = time.time()

while True:
    
    # binary data
    data = stream.read(CHUNK)  
    
    # convert data to integers, make np array, then offset it by 127
    data_int = struct.unpack(str(2 * CHUNK) + 'B', data)
    
    # create np array and offset by 128
    data_np = np.array(data_int, dtype='b')[::2] + 128
    
    line.set_ydata(data_np)
    
    # update figure canvas
    try:
        fig.canvas.draw()
        fig.canvas.flush_events()
        frame_count += 1
        
    except TclError:
        
        # calculate average frame rate
        frame_rate = frame_count / (time.time() - start_time)
        
        print('stream stopped')
        print('average frame rate = {:.0f} FPS'.format(frame_rate))
        break

stream started
stream stopped
average frame rate = 21 FPS


##  Training Dataset Preparation

Input to model : 8 sec of audio clip with My sound Dubbed in between . 

Label : Label some number of time steps 

Inference : Model listens for 8 second for a while. Then, processes it . waits for a while. Then, Processes ..

More like, multithread, such that, by time it processes for prediction generation, it's preparing next wave .

The computaiton control is done by time.sleep .. 

In [1]:
import numpy as np
from pydub import AudioSegment
import random
import sys
import io
import os
import glob
import IPython
%matplotlib inline

In [2]:
import logging

In [3]:
os.chdir('../')

In [4]:
# IPython.display.Audio("data/external/sample_internet.wav")

In [5]:
from pydub import AudioSegment
song = AudioSegment.from_wav('./data/external/podact_data/train_1.wav')

In [6]:
type(song)

pydub.audio_segment.AudioSegment

In [7]:
## load training examles

In [8]:
root_ =  './data/external/podact_data/activates_robin'
clips = []
for i in os.listdir(root_):
    if i[0].isdigit():
        clip_path = os.path.join( root_ , i ) 
        clip = AudioSegment.from_wav(clip_path)
        print('Traning Clip Vol : ' , clip.dBFS )
        clips.append( clip )

Traning Clip Vol :  -30.7636744506485
Traning Clip Vol :  -36.48736885673499
Traning Clip Vol :  -37.11067469779735
Traning Clip Vol :  -39.304431638092474
Traning Clip Vol :  -33.38225645659825
Traning Clip Vol :  -39.80810255845745
Traning Clip Vol :  -43.66022950088225
Traning Clip Vol :  -42.07660457992975
Traning Clip Vol :  -43.03675910135147
Traning Clip Vol :  -39.91243882367998
Traning Clip Vol :  -32.624423306542276
Traning Clip Vol :  -37.18703465893772
Traning Clip Vol :  -36.95993964139527
Traning Clip Vol :  -32.51176427403059
Traning Clip Vol :  -36.59416392714908
Traning Clip Vol :  -41.68172341601461
Traning Clip Vol :  -41.273269988708556
Traning Clip Vol :  -41.97618855242873


In [9]:
total_song_length = song.duration_seconds

In [10]:
from pydub.playback import play
import random

In [11]:
n = 1000
play( song[23.9999999 * n : 29 * n] ) 

In [12]:
def sample_from_activate( clip_list ):
    return random.choice(clip_list)

## experimental 
def adjust_level(sound, deviation=None , default = True):
    if default or deviation is None:
        return sound
    else:
        difference = np.random.normal(clip.dBFS , deviation )
        print(f'Original : {clip.dBFS}')
        print(f'Deviated by : {difference}')
        return sound.apply_gain(difference)

def sample_from_background( background , total_duration_sec , clip_size = 8 ,  multiplier = 1000 ):
        
    total_duration_ = total_duration_sec - clip_size
    selection_start = np.random.uniform(  0 , total_duration_  ) 
    selection_end = (selection_start + clip_size) * multiplier
        
    background_clip = background[ selection_start * multiplier : selection_end  ] 
    return background_clip

def overlay_clip_to_bg( background , clip ):
    '''
        Dub Clip to background
    '''
    overlay_point_start = np.random.uniform( 0 , background.duration_seconds ) * 1000
    overlay_point_end = overlay_point_start + len(clip)
    overlay_point_end = min( len(background) , overlay_point_end  )
    
    return background.overlay( clip , position =  overlay_point_start ) , overlay_point_start , overlay_point_end

In [13]:
len(clip)

662

In [14]:
%%time
## check the goddamn speed
for i in range(3000):
    _ = sample_from_background(song, total_song_length)

Wall time: 3.6 s


In [15]:
def generate_single_example( background , voice_clips ):
    
    total_background_length = background.duration_seconds 
    background = sample_from_background( background , total_background_length )
    
    ## positive labels here
    if np.random.uniform(0,1) > 0.5:
        print('Positive Example Added')
        clip_to_dub = sample_from_activate( voice_clips )
        overlayed_clip, time_stamp_start , time_stamp_end = overlay_clip_to_bg( background, clip_to_dub )  
        return overlayed_clip , time_stamp_start , time_stamp_end
    
    else:
        print('Negative Example Added')
        return background , None, None

In [16]:
test_overlay , s ,e = generate_single_example( song , clips )

Negative Example Added


In [17]:
s,e

(None, None)

In [18]:
len(test_overlay)

8000

In [19]:
play(test_overlay)

In [20]:
pwd

'c:\\Users\\Robin\\Downloads\\million_dollar_projects\\podcast_research\\Podcast-Audio-Processing'

In [21]:
test_overlay.export('data\\external\\processed\\test.wav',format='wav')

<_io.BufferedRandom name='data\\external\\processed\\test.wav'>

In [22]:
import pandas as pd
meta_data = pd.DataFrame(columns=['filename','start_time','end_time'])

In [23]:
def prep_examples( no_of_examples = 2000 ):

    save_path = "data\\external\\processed\\"
    import pandas as pd

    meta_data = pd.DataFrame(columns=['filename','start_time','end_time'])
    
    for i in range( no_of_examples ):
        save_name = f'{i}.wav'
        sample , s ,e = generate_single_example( song , clips )
        sample.export( os.path.join( save_path , save_name ) ,format='wav')

        meta_data.loc[i] = [ save_name , s , e   ] 
    
    return meta_data


In [24]:
meta = prep_examples(2000)

xample Added
Positive Example Added
Positive Example Added
Positive Example Added
Positive Example Added
Positive Example Added
Negative Example Added
Negative Example Added
Positive Example Added
Negative Example Added
Positive Example Added
Positive Example Added
Negative Example Added
Positive Example Added
Negative Example Added
Positive Example Added
Positive Example Added
Negative Example Added
Positive Example Added
Negative Example Added
Negative Example Added
Negative Example Added
Positive Example Added
Negative Example Added
Negative Example Added
Negative Example Added
Positive Example Added
Negative Example Added
Negative Example Added
Negative Example Added
Positive Example Added
Positive Example Added
Positive Example Added
Negative Example Added
Positive Example Added
Negative Example Added
Negative Example Added
Positive Example Added
Negative Example Added
Negative Example Added
Positive Example Added
Positive Example Added
Positive Example Added
Positive Example Adde

In [25]:
meta.start_time.isna().value_counts()

False    1006
True      994
Name: start_time, dtype: int64

In [26]:
meta.to_csv('data//meta_data.csv',index=False)