In [1]:
# Standard libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import librosa
from google.colab import drive
import os
import random

from librosa import load as lload
from librosa.feature import mfcc
from librosa.feature.inverse import mfcc_to_audio

# Mount Google Drive for access
drive.mount('/content/drive')

# Base directory path
base_directory_path = '/content/drive/MyDrive/207-Project/notebooks/RG/3_species/librosa_loaded_sr16000/'

# Augmentation functions
def add_noise(data):
    noise = np.random.randn(len(data))
    data_noise = data + 0.005 * noise
    return data_noise

def shift(data):
    return np.roll(data, 1600)

# def pitch(data, sample_rate):
#     return librosa.effects.pitch_shift(data, sample_rate, np.random.randint(-5, 5))

def change_volume(data):
    return data * np.random.uniform(low=0.75, high=1.25)

def augment(data, sample_rate):
    choice = np.random.choice([1, 2, 3])
    if choice == 1:
        return add_noise(data)
    elif choice == 2:
        return shift(data)
    elif choice == 3:
        return change_volume(data)
    else:
        return data

# Function to extract 5-second  chunks from audio
def extract_5sec_chunks(
    audio_array: np.ndarray,
    window_size_s: float = 5.0,
    hop_size_s: float = 2.5, #(with overlap)
    sample_rate=16000,
    augment_ratio=0.3
) -> np.ndarray:
    # Augment data if random value is less than augment_ratio
    if random.random() < augment_ratio:
        audio_array = augment(audio_array, sample_rate)

    frame_length = int(window_size_s * sample_rate)
    frame_step = int(hop_size_s * sample_rate)
    framed_audio = tf.signal.frame(audio_array, frame_length, frame_step, pad_end=False)

    return framed_audio



Mounted at /content/drive


In [2]:
# Load metadata
dataset_path = '/content/drive/MyDrive/207-Project/notebooks/RG/3_species/'
metadata_path = os.path.join(dataset_path, "train_val.csv")
metadata_df = pd.read_csv(metadata_path)

# Create a dictionary to map filenames to labels
labels_dict = metadata_df.set_index('filename_npy')['primary_label'].to_dict()

# Get unique filenames from the metadata
filenames = set(base_directory_path + '/' + metadata_df['filename_npy'].unique())

# Split train and validation data
train_df = metadata_df[metadata_df['data'] == 'train']
validate_df = metadata_df[metadata_df['data'] == 'val']

# Load all train audio data one time
train_audios = []
for filename in train_df['filename_npy']:
    audio = np.load('/content/drive/MyDrive/207-Project/data/train/librosa_loaded/' + filename)
    train_audios.append(audio)

# Load all validate audio data one time
val_audios = []
for filename in validate_df['filename_npy']:
    audio = np.load('/content/drive/MyDrive/207-Project/data/train/librosa_loaded/' + filename)
    val_audios.append(audio)

#Initialize an empty list to store the chunk data for training data
train_chunks = []
for audio in train_audios:
    train_chunks.append(extract_5sec_chunks(audio))

#Initialize an empty list to store the chunk data for VALIDATION data
val_chunks = []
for audio in val_audios:
    val_chunks.append(extract_5sec_chunks(audio))

#add chunks to training data
train_df['audio_chunks'] = train_chunks

#add chunks to validation data
validate_df['audio_chunks'] = val_chunks

#add chunks to trainig data
train_df['audio_chunks'] = train_chunks
train_df.head()
train_df = train_df.sample(frac=1, random_state=1234)

#add chunks to validation data
validate_df['audio_chunks'] = val_chunks
validate_df.head()
validate_df = validate_df.sample(frac=1, random_state=1234)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['audio_chunks'] = train_chunks
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  validate_df['audio_chunks'] = val_chunks
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['audio_chunks'] = train_chunks
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[ro

In [3]:
# drop the samples with less than 8 seconds in duration
metadata_df = metadata_df[metadata_df['duration_secs_32000'] >= 8]

In [4]:
def mfcc_chunks(dataframe: pd.DataFrame, sample_rate=16000, n_mfcc=13, n_fft=2048) -> tuple[np.ndarray, np.ndarray]:
    y = []
    X = []
    continents = []

    for i, row in dataframe.iterrows():
      label = row['primary_label']
      tensor_5sec = row['audio_chunks']
      continent = row['continent']


      for each in tensor_5sec:
        y.append(label)
        continents.append(continent)
        each = np.array(each)
        X_mfcc = mfcc(y=each, sr=sample_rate)
        X.append(X_mfcc)

    assert len(y) == len(X)
    assert len(y) == len(continents)

    y = np.array(y)
    X = np.array(X)
    continents = np.array(continents)

    return X, y, continents



In [6]:

#convert to X and y
X_train, y_train,  train_continents = mfcc_chunks(train_df)

#convert to X and y
X_val, y_val , val_continents= mfcc_chunks(validate_df)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)
print(train_df['primary_label'].unique())

# Save data to JSON files
X_train_list = X_train.tolist()
y_train_list = y_train.tolist()
X_val_list = X_val.tolist()
y_val_list = y_val.tolist()
train_cont  = train_continents.tolist()
val_cont = val_continents.tolist()



X_train shape: (7889, 20, 157)
y_train shape: (7889,)
X_val shape: (3121, 20, 157)
y_val shape: (3121,)
['comsan' 'eaywag1' 'barswa']


In [7]:
import json
# Save data to JSON files
with open('/content/drive/MyDrive/My-207/X_train_mfcc_aug.json', 'w') as file:
    json.dump(X_train_list, file)

with open('/content/drive/MyDrive/My-207/y_train_mfcc_aug.json', 'w') as file:
    json.dump(y_train_list, file)

with open('/content/drive/MyDrive/My-207/X_val_mfcc_aug.json', 'w') as file:
    json.dump(X_val_list, file)

with open('/content/drive/MyDrive/My-207/y_val_mfcc_aug.json', 'w') as file:
    json.dump(y_val_list, file)

with open('/content/drive/MyDrive/My-207/X_val_cont.json', 'w') as file:
    json.dump(train_cont, file)

with open('/content/drive/MyDrive/My-207/y_val_cont.json', 'w') as file:
    json.dump(val_cont, file)
