# MANIPULATING AUDIO FILES WITH PYDUB

## Working with Audio files requires them to be in a consistent manner and to help to this PYDUB is needed to manipulate the audio files. PYDUB issures your audio files are consistent to work with wherever.

In [None]:
# if you do not have pydub install it using also install ffmpeg
# pip install pydub

In [1]:
# Let's import audio files using pydub's class called AudioSegment
from pydub import AudioSegment
wav_file = AudioSegment.from_file(file='sounds/r1_goodAfternoon.wav')
# check type
type(wav_file)



pydub.audio_segment.AudioSegment

In [None]:
# Now we can even play the audio 
# install simple audio $pip install simpleaudio

In [None]:
# import the play function
from pydub.playback import play
# pass the audio file to the play function as this will play the audio file
play(wav_file)

## Remember Audio Parameters?
### You can check the frame_rate, sample width, length and number of channels

In [2]:
# Lets check out a few parameters
# length-duration of the audio file in milliseconds
len(wav_file)

4389

In [3]:
# frame rate
wav_file.frame_rate

48000

In [4]:
# number of bytes per sample
wav_file.sample_width

2

In [5]:
# amplitude-loudness of the audio file
wav_file.max

18775

# Changing Parameters

## The audio file parameters can be changed by using the syntax  below
### changed_audio_segment = audio_segment.set_ATTRIBUTENAME(value)

In [7]:
# change the frame rate
changed_audio_segment = wav_file.set_frame_rate(16000)
changed_audio_segment.frame_rate

16000

In [8]:
# Set sample_width to 1
wav_file_sw_1 = wav_file.set_sample_width(1)
# Check new sample_width
print(f"New sample width: {wav_file_sw_1.sample_width}")

New sample width: 1


# NOTE!!
###  Lowering the values generally leads to
- ## lower audio quality and worse transcriptions
### but increasing them may increase the file size but not the quality of the transcription. 

### Best to explore with different values and find out the ideal tradeoff.

In [10]:
# Speech recognition works best on clean, audible speech. 
# If your audio files are too quiet or too loud, it can hinder transcription.

# Lower the volume by 60 dB
quiet_volume_adjusted = wav_file - 60
#play(quiet_volume_adjusted)

In [14]:
# Increase the volume by 15 dB
louder_volume_adjusted = quiet_volume_adjusted + 15

### Sometimes you'll have audio files where the speech is loud in some portions and quiet in others and this variance in volume can hinder transcription.

### PyDub's effects module has a function called normalize() which finds the maximum volume of an AudioSegment, then adjusts the rest of the AudioSegment to be in proportion. 
### This means the quiet parts will get a volume boost.

In [15]:
# Import AudioSegment and normalize
from pydub.effects import normalize
# Import target audio file

loud_then_quiet = AudioSegment.from_file('sounds/r8_loud_to_quiet.wav')
# Normalize target audio file
normalized_audio = normalize(loud_then_quiet)
#play(normalized_audio)

# SLICING AUDIO FILES


## Another feature of AudioSegment is that you can slice and combine your audio

In [16]:
# import the audio with static 
static_at_start = AudioSegment.from_file('sounds/r6_static_noise_at_start.wav')

# slice the static_at_start using the length of static noise in seconds
no_static = static_at_start[4000:]
#play(no_static)

In [None]:
# to combine audio is relatively easy
#
# combined_audio = audio_1 + audio_2

# Splitting stereo audio to mono with PyDub

## When transcribing phone calls, there's usually more than one speaker and its difficult to transcribe for multiple speakers.
## PyDub's split_to_mono() function can help with this.

In [17]:
# import stereo audio file and check channels
stereo_phone_call = AudioSegment.from_file('sounds/ex3_stereo_call.wav')
print(f"Stereo number channels: {stereo_phone_call.channels}")

Stereo number channels: 2


In [18]:
# Split stereo phone call and check channels
channels = stereo_phone_call.split_to_mono()
# now channels is a list object
print(f"Stereo number channels: {channels[0].channels},{channels[1].channels}")

Stereo number channels: 1,1


In [19]:
# Save new channels separately
phone_call_channel_1 = channels[0]
phone_call_channel_2 = channels[1]
#play(phone_call_channel_2)
# now it will be easy to use the speech api to transcribe the audio

# Exporting and reformatting audio files

## If your files have the wrong extension, you can use PyDub to export and save them as new audio files

### You can do this by using the .export() function on any instance of an AudioSegment you've created. The export() function takes two parameters, out_f, or the destination file path of your audio file and format, the format you'd like your new audio file to be. Both of these are strings. format is "mp3" by default so be sure to change it if you need.

In [None]:
# This works if you have installed ffmpeg
# Import the .mp3 file
mp3_file = AudioSegment.from_file('sounds/r5.mp3')
# Export the .mp3 file as wav
mp3_file.export(out_f='sounds/r5_welcome.wav',format='wav')

# Manipulating multiple audio files with PyDub

## You've seen how to convert a single file using PyDub but what if you had a folder with multiple different file types

In [None]:
# Loop through the files in the folder 
# create a folder path 
for audio_file in folder:
    
	# Create the new .wav filename   
	wav_filename = os.path.splitext(os.path.basename(audio_file))[0] + ".wav"
         
 	# Read audio_file and export it in wav format 
	AudioSegment.from_file(audio_file).export(out_f=wav_filename,format='wav') 
print(f"Creating {wav_filename}...")


# An Audio Processing workflow

## You've seen how to import and manipulate a single audio file using PyDub. 
## what if you had a folder with multiple audio files you needed to convert?
## Here we'll use PyDub to format a folder with files to be ready to use with speech_recognition.
### You find that some files are quieter than others to fix this, we'll use PyDub to cut the static, increase the sound level and convert them to the .wav extension.

In [None]:
import os
for audio_file in folder:
  
  file_with_static = AudioSegment.from_file(audio_file)  
    # Cut the 3-seconds of static off 
    file_without_static = file_with_static[3000:]

    # Increase the volume by 10dB
    louder_file_without_static = file_without_static + 10

    # Create the .wav filename for export  
    wav_filename = os.path.splitext(os.path.basename(audio_file))[0] + ".wav"

    # Export the louder file without static as .wav
    louder_file_without_static.export(wav_filename, format='wav')
print(f"Creating {wav_filename}...")
