In [2]:
import matplotlib.pyplot as plt
import numpy as np
import librosa
import librosa.feature
import os
from pydub import AudioSegment
from pydub.utils import make_chunks


In [3]:
def wav_extraction(input_folder, output_folder, audio_format, chunk_size):
    file_counter = 0
    if os.path.exists(output_folder) and os.path.isdir(output_folder):
        with os.scandir(output_folder) as entries:
            for entry in entries:
                if entry.is_file():
                    file_counter += 1
                    
    os.makedirs(output_folder, exist_ok=True)
    
    for j, filename in enumerate(os.listdir(input_folder)):
        full_filename = os.path.join(input_folder, filename)
        audio = AudioSegment.from_file(full_filename, format=audio_format)
        chunks = make_chunks(audio, chunk_size)

        for i, chunk in enumerate(chunks):
            if len(chunk) < chunk_size:
                continue

            chunk_name = f"chunk{i}{j + file_counter}.wav"
            full_chankname = os.path.join(output_folder, chunk_name)
            
            chunk.export(full_chankname, format="wav")

        
def mfcc_extraction(input_folder, output_folder, category_folder):
    file_counter = 0
    counter = 0
    os.makedirs(output_folder, exist_ok=True)
    
    train_folder = os.path.join(output_folder, 'train')
    val_folder = os.path.join(output_folder, 'val')
    test_folder = os.path.join(output_folder, 'test')
    
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(val_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    
    train_folder_cat = os.path.join(train_folder, category_folder)
    val_folder_cat = os.path.join(val_folder, category_folder)
    test_folder_cat = os.path.join(test_folder, category_folder)
    
    os.makedirs(train_folder_cat, exist_ok=True)
    os.makedirs(val_folder_cat, exist_ok=True)
    os.makedirs(test_folder_cat, exist_ok=True)
    
    if os.path.exists(output_folder) and os.path.isdir(output_folder):
        with os.scandir(output_folder) as entries:
            for entry in entries:
                if entry.is_file():
                    file_counter += 1
                    
    # print(file_counter)
    
    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(output_folder, exist_ok=True)
             
    for index, filename in enumerate(os.listdir(input_folder)):   
        full_filename = os.path.join(input_folder, filename)  
        
        sr = 16000
        n_mfcc = 12
        n_mels = 128

        audio_data, sampling_rate = librosa.load(full_filename, sr=sr)
        mels = librosa.feature.melspectrogram(y=audio_data, sr=sr, n_mels=n_mels)

        mfccs = librosa.feature.mfcc(S=librosa.power_to_db(mels, ref=np.max), sr=sr, n_mfcc=n_mfcc)
    
        mfcc_filename = f"mfcc_chunk_{index + file_counter}.npy"
        
        if counter == 7:
            full_mfcc_filename = os.path.join(val_folder_cat, mfcc_filename)
            np.save(full_mfcc_filename, mfccs)
            counter += 1
        if counter == 8:
            full_mfcc_filename = os.path.join(test_folder_cat, mfcc_filename)
            np.save(full_mfcc_filename, mfccs)
            counter = 0
        else:
            full_mfcc_filename = os.path.join(train_folder_cat, mfcc_filename)
            np.save(full_mfcc_filename, mfccs)
            counter += 1

In [4]:
print(os.getcwd())   

C:\programming\machine-learning\recognition


In [6]:
inp1 = "data_mp3"
inp2 = "data_ogg"
inp3 = "to_conv_m4a"

wav_extraction(inp1, "audio_data_set", "mp3", 5000)

In [7]:
wav_extraction(inp2, "my_audio_data_set", "ogg", 5000)
wav_extraction(inp3, "my_audio_data_set", "m4a", 5000)

In [8]:
inpch1 = "my_audio_data_set"
inpch2 = "audio_data_set"

outmfcc1 = "my_data_set_mfcc"
outmfcc2 = "data_set_mfcc"

data_folder = "data_set"

mfcc_extraction(inpch1, data_folder, outmfcc1)
mfcc_extraction(inpch2, data_folder, outmfcc2)