# Cutting audio mp3 files
## Imports

In [2]:
import pandas as pd
import os
import pydub

## Function definitions
### absolute_file_paths
Returns list of absolute urls of all files in a specified directory.

### cut_mp3
Cuts long mp3s in a specified *mp3_dir* according to segmenter output in the *segment_dir* and exports the audio segments to the *target_dir*.

In [3]:
def absolute_file_paths(directory):
    for dirpath,_,filenames in os.walk(directory):
        return [os.path.abspath(os.path.join(dirpath, f)) for f in filenames]

def cut_mp3(segment_dir, mp3_dir, target_dir):
    if not os.path.exists(segment_dir):
        print('Segmeter directory doesn\' exist.')
        return
    if not os.path.exists(mp3_dir):
        print('MP3 directory doesn\' exist.')
        return
    if not os.path.exists(target_dir): os.mkdir(target_dir)

    csv_paths = sorted(absolute_file_paths(segment_dir))
    wav_csv = open(os.path.join(target_dir, 'data.csv'), 'w')
    wav_csv.write('file,length' + '\n')
    
    for csv_path in csv_paths:
        filename = csv_path.split(os.path.sep)[-1].split('.')[0]
        audio_path = os.path.join(mp3_dir, csv_path.split(os.path.sep)[-1].replace('.csv', '.mp3'))
        audio = pydub.AudioSegment.from_mp3(audio_path)
        df = pd.read_csv(csv_path, delimiter='\t')
        count = 0
        
        for idx, line in df[df['labels'] == 'speech'].iterrows():
            start_time = int(line['start']*1000 - 100)
            if start_time < 0:
                start_time = 0
            stop_time = int(line['stop']*1000 + 100)
            audio_cut = audio[start_time:stop_time]
            export_path = os.path.join(target_dir, filename+'_'+str(count).zfill(4)+'.wav')
            audio_cut.export(export_path, format='wav')
            wav_csv.write(export_path + ',' + str(len(audio_cut)) + '\n')
            count += 1

In [None]:
cut_mp3(segment_dir='/opt/shared_data/cpm_mp3_seg/',
        mp3_dir='/opt/shared_data/cpm_mp3/',
        target_dir='/opt/shared_data/cpm_wav_cut/')