## Introduction

Workbook contains loop to convert all `.amr` files in selected folder to `.wav` files.

Adapted from [this](https://gist.github.com/Kronopath/c94c93d8279e3bac19f2) process

### Note

It is best to create a separate env and install ffmpeg as there may be existing conflicts with your base environment in anaconda.

Simply create a conda environment in the anaconda command prompt:


1. conda create -n asr

2. conda activate asr

and then install ffmpeg directly in its own env:

3. conda install -c conda-forge ffmpeg

In [10]:
import os, argparse, subprocess
from datetime import datetime

def bulk_convert(audio_src='audio_samples', verbose=True, remove_intermediate=True):
    """
    converts all .amr file is audio_src to intermediate .aud file before converting to .wav in audio_src/converted
    
    ***Args:
    - audio_src: folder containing .amr files that need conversion
               : assumes this is in same folder as workbook
    - verbose: will print progress if True
    - remove_intermediate: will remove intermediate .aud if True
    
    ***Returns:
    - folder audio_src/converted: contains converted .wav versions of the .amr files in audio_src
    
    """
    # Find the absolute file path to your audio files
    audio_src = os.path.join(os.getcwd(), audio_src)
    
    def convert_amr_file(input_dir, input_file_name, verbose=verbose, remove_intermediate=remove_intermediate):
        
        """
        converts single to intermediate .aud file before converting to .wav
        
        """
        # Create an additional folder to stored the converted .wav files
        output_dir = os.path.join(input_dir, 'converted' )
        if not os.path.isdir(output_dir): os.mkdir(output_dir)
            
        # Find the absolute file path and append the .amr file to this path
        input_file_path = os.path.join(input_dir, input_file_name)
        if verbose: print(input_file_path)
        
        # Open this input .amr file
        input_file = open(input_file_path, 'rb')
        if verbose: print(input_file)
            
        # Replace the input .amr file name with an intermediatiary .aud file
        intermediate_file_name = input_file_name.replace(".amr",".aud")
        if verbose: print(intermediate_file_name)
        
        # Create a path for the .aud file in the "/converted" folder space
        intermediate_file_path = os.path.join(output_dir, intermediate_file_name)
        if verbose: print(intermediate_file_path)
        
        # Open the intermediate file
        intermediate_file = open(intermediate_file_path, 'wb')
        if verbose: print(intermediate_file)
        
        # Write the input file to the intermediate file and close both
        intermediate_file.write(input_file.read())
        input_file.close()
        intermediate_file.close()
        
        # Replace .amr with .wav naming
        output_file_name = input_file_name.replace(".amr", ".wav")
        if verbose: print(output_file_name)
            
        # Join this .wav file path to the autput directory path
        output_file_path = os.path.join(output_dir, output_file_name)
        if verbose: print(output_file_path)

        # Create a file to dump the .aud files
        black_hole_file = open("black_hole", "w")
        
        # Convert the .aud files to .wav files with a specific sampling rate
        
        # sampling rate alteration follows this method: https://stackoverflow.com/questions/63793137/convert-mp3-to-wav-with-custom-sampling-rate
        subprocess.call([ "ffmpeg", "-i", intermediate_file_path, "-ar", "16k", output_file_path],
                        stdout = black_hole_file, stderr = black_hole_file)
        
        # Close the dump file
        black_hole_file.close()
    
        if remove_intermediate:
            # Delete the junk files
            os.remove("black_hole")
            os.remove(intermediate_file_path)
            
            ###~
    
    # Loop over each file in the directory and convert to .wav
    for dirname, dirnames, filenames in os.walk(audio_src):
        for filename in filenames:
            input_path = os.path.join(dirname, filename)
            if verbose: print(f'input_path: {input_path}')
            try: 
                convert_amr_file(dirname, filename,verbose=verbose)
                if verbose: print(f"Done converting {filename}!")
            except:
                if verbose:  print(f"ERROR on {filename}")
                    
bulk_convert(audio_src='1', verbose=True, remove_intermediate=True)

input_path: /Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/1.amr
/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/1.amr
<_io.BufferedReader name='/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/1.amr'>
1.aud
/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/converted/1.aud
<_io.BufferedWriter name='/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/converted/1.aud'>
1.wav
/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/converted/1.wav
Done converting 1.amr!
input_path: /Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/3.amr
/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/3.amr
<_io.BufferedReader name='/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/3.amr'>
3.aud
/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/converted/3.aud
<_io.BufferedWriter name='/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/1/converted/

In [9]:
os.chdir('/Users/neelanpather/dev/asr/data-pre-processing/dataset/source/')