# Getting metadata of audio files using PyDub 
Metadata of audio files are obtained for each file to obtain the loudness level measured in dBFS so that it can be normalised to a specified value later

In [1]:
import pandas as pd
import numpy as np
import os
from os.path import join
from pydub import AudioSegment
from pydub import AudioSegment, silence



In [2]:
#Specify folder path where edited audio files (removal of leading and trailing silences) were stored in 
audiofolder_path = '/Users/jannaha/Desktop/audio_files/output'

In [3]:
#Path to excel file with data on audio files. Data includes name of wavefiles with .wav extension, word and selected token number
excel_file = '/Users/jannaha/Desktop/audio_files/wavefiles.xlsx'

In [4]:
df = pd.read_excel(excel_file)

In [5]:
df.shape

(13, 5)

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,wavefile,word,token_no,new-wavefile
0,0,001 Saya 01.wav,saya,1,001 Saya 01-new.wav
1,1,002 kita 01.wav,kita,1,002 kita 01-new.wav
2,2,003 kalau 01.wav,kalau,1,003 kalau 01-new.wav
3,3,004 jadi 01.wav,jadi,1,004 jadi 01-new.wav
4,4,005 tapi 01.wav,tapi,1,005 tapi 01-new.wav


In [7]:
#adding new column with -new appended to wavfile name 
#this new-wavefile column represents the files where leading and trailing silences were deleted
audiofile = df['wavefile']
df['new-wavefile'] = df['wavefile'].str[:-4] + '-new.wav'

In [8]:
df.head()

Unnamed: 0.1,Unnamed: 0,wavefile,word,token_no,new-wavefile
0,0,001 Saya 01.wav,saya,1,001 Saya 01-new.wav
1,1,002 kita 01.wav,kita,1,002 kita 01-new.wav
2,2,003 kalau 01.wav,kalau,1,003 kalau 01-new.wav
3,3,004 jadi 01.wav,jadi,1,004 jadi 01-new.wav
4,4,005 tapi 01.wav,tapi,1,005 tapi 01-new.wav


In [9]:
#saves this df as a new excel file
df.to_excel(excel_file, 'Sheet1')

Creating a new dataframe called df_att to store metadata of each audio file 
Metadata includes:
1. Bytes sample 
2. Number of channels (should correspond to 1 -- representing mono format files)
3. Frame size (should correspond to 44.1kHz)
4. RMS
5. DBFS (This is the level of loudness of each audio file)
6. Peak amplitude
7. Total length of audio file in milliseconds 

In [10]:
#create an empty dataframe of soundfile attributes
df_att = pd.DataFrame(columns = ['sample','channels','frames','rms','dbfs','peak_amp','total_len'])

In [11]:
def get_info():
    for i in df.index:
        new_audiofile = df.loc[i,'new-wavefile']
        wavefile_path = os.path.join(audiofolder_path, new_audiofile)
        
        #checks if file exists in the specified folder 
        if not os.path.isfile(wavefile_path):
            print(f"{wavefile_path} does not exist.")
        
        sound = AudioSegment.from_file(wavefile_path)
        
        bytes_per_sample = sound.sample_width
        channel_count = sound.channels
        frames_per_second = sound.frame_rate
        loudness = sound.rms
        dbfs_loudness = sound.dBFS
        peak_amplitude = sound.max
        total_len = len(sound)
        
        df_att.loc[i] = [bytes_per_sample,channel_count,frames_per_second, loudness, dbfs_loudness,peak_amplitude, total_len]


In [12]:
get_info()

In [13]:
df_att.head()

Unnamed: 0,sample,channels,frames,rms,dbfs,peak_amp,total_len
0,2.0,1.0,44100.0,874.0,-31.47877,5686.0,519.0
1,2.0,1.0,44100.0,474.0,-36.793432,3154.0,1043.0
2,2.0,1.0,44100.0,789.0,-32.367459,5335.0,397.0
3,2.0,1.0,44100.0,869.0,-31.528603,4919.0,623.0
4,2.0,1.0,44100.0,519.0,-36.005652,3045.0,200.0


In [16]:
#creating excel writer
write_to = '/Users/jannaha/Desktop/audio_files/output/metadata_delsilence.xlsx'

#writing dataframe to excelsheet, named sheet 1
df_att.to_excel(write_to, index=False)