In [2]:
from google.colab import drive, auth
import sys
import numpy as np
import pandas as pd
import tensorflow_hub as hub
import librosa
import matplotlib.pyplot as plt
import csv
from IPython.display import Audio

#sklearn libraries
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

#tensorflow for modles
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM, Conv2D, MaxPooling2D, Flatten, concatenate, Reshape, BatchNormalization
import tensorflow_hub as hub
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import Callback,EarlyStopping
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from keras import metrics
tf.get_logger().setLevel('INFO')

#mount drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
#declare sampling rate
sampling_rate = 16000
#get the audio path of all the audios
audio_path = '/content/drive/MyDrive/UCB-MIDS/SEM-2/MACHINE-LEARNING-207/207-Project/data/train/librosa_loaded/'

In [4]:
#Read the data and get the shapes of the dataframe
bird_df = pd.read_csv('/content/drive/MyDrive/UCB-MIDS/SEM-2/MACHINE-LEARNING-207/207-Project/notebooks/RG/3_species/train_val.csv')
bird_df.shape

#Gather the training data from the main dataset
bird_train_df = bird_df[bird_df['data'] == 'train']
print("train data:",bird_train_df.shape)

#Gather the separate validation set from the csv
bird_val_df =  bird_df[bird_df['data'] == 'val']
print("validation data:",bird_val_df.shape)

train data: (657, 9)
validation data: (283, 9)


In [5]:
#Function to load the audio
def load_audio(file_name):
    audio = np.load(audio_path + file_name)
    return audio

In [6]:
#chunk the audio into 5 sec frame with no overlaps
def frame_audio(
      audio_array: np.ndarray,
      window_size_s: float = 8.0,
      hop_size_s: float = 4.0,
      sample_rate = sampling_rate,
      ) -> np.ndarray:

    """Helper function for framing audio for inference."""
    """ using tf.signal """
    if window_size_s is None or window_size_s < 0:
        return audio_array[np.newaxis, :]
    frame_length = int(window_size_s * sample_rate)
    hop_length = int(hop_size_s * sample_rate)
    framed_audio = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=False)
    return framed_audio

In [8]:
#extract mfcc from frames
def extract_mfcc_from_frames(framed_audio, class_label, sample_rate=16000, n_mfcc=40):
  mfcc_frames = []
  target_label = []
  for frame in framed_audio:
    frame = np.array(frame)

    #Extract mfcc from the main audio frame
    mfcc = librosa.feature.mfcc(y=frame, sr=sample_rate, n_mfcc=n_mfcc)

    #Transpose the MFCC matrix
    transposed_mfcc = mfcc.T

    #lets get the mfcc
    mfcc_frames.append(transposed_mfcc)

    #append the labels for every frame
    target_label.append(class_label)

  return mfcc_frames,target_label

In [18]:
import librosa
import numpy as np
from IPython.display import Audio

def split_audio_and_time_stretch(audio_file, duration, sampling_rate, target_duration):
    # Load the audio using librosa
    audio, _ = librosa.load(audio_file, sr=sampling_rate)

    # Calculate the time stretch factor to make the audio segments target_duration seconds long
    time_stretch_factor = target_duration / duration

    # Resample the audio to make it target_duration seconds long
    audio_stretched = librosa.resample(audio, orig_sr=sampling_rate, target_sr=int(sampling_rate * time_stretch_factor))

    # Split the audio into segments of target_duration seconds
    num_segments = len(audio_stretched) // (target_duration * sampling_rate)
    time_stretched_segments = np.array_split(audio_stretched, num_segments)

    return time_stretched_segments

In [20]:
audio_file = "/content/drive/MyDrive/UCB-MIDS/SEM-2/MACHINE-LEARNING-207/207-Project/BirdCLEF/train_audio/abethr1/XC128013.ogg"  # Replace with the path to your audio file
duration = 46  # Replace with the duration of the original audio in seconds
sampling_rate = 22050  # Replace with the sampling rate of your audio file
target_duration = 8  # Replace with the desired duration of each segment in seconds

# Call the function
time_stretched_segments = split_audio_and_time_stretch(audio_file, duration, sampling_rate, target_duration)

# Listen to each time-stretched segment
for i, segment in enumerate(time_stretched_segments):
    display(Audio(segment, rate=sampling_rate))


ValueError: ignored

In [22]:
#Count the number of audio files that are exactly 15 seconds
num_15_sec = len(bird_train_df[bird_train_df['duration_secs_32000'] > 8])

#Count the number of audio files that are less than 15 seconds
num_less_than_15_sec = len(bird_train_df[bird_train_df['duration_secs_32000'] < 8])

print("Number of audio files that are exactly 8 seconds:", num_15_sec)
print("Number of audio files that are less than 8 seconds:", num_less_than_15_sec)

Number of audio files that are exactly 8 seconds: 560
Number of audio files that are less than 8 seconds: 97


In [28]:
# Assuming bird_train_df is your DataFrame containing the audio information
# Find audio files with duration less than 6 seconds
audios_less_than_6_sec = bird_train_df[bird_train_df['duration_secs_32000'] < 6]

# Group the DataFrame by primary_label and get the count of audio files in each group
grouped_by_primary_label = audios_less_than_6_sec.groupby('primary_label').size()

# Display the count of audio files for each primary_label
print(grouped_by_primary_label)

primary_label
barswa      7
comsan     19
eaywag1     8
dtype: int64


In [30]:
# Assuming bird_train_df is your DataFrame containing the audio information
# Find audio files with duration less than 6 seconds
audios_less_than_6_sec = bird_train_df[bird_train_df['duration_secs_32000'] < 10]

# Group the DataFrame by primary_label and get the count of audio files in each group
grouped_by_primary_label = audios_less_than_6_sec.groupby('primary_label').size()

# Display the count of audio files for each primary_label
print(grouped_by_primary_label)

primary_label
barswa     30
comsan     73
eaywag1    41
dtype: int64
