<a href="https://colab.research.google.com/github/sanjeevtrivedi/pgd-dsai/blob/main/AudioEncoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: Mount gdrive and create two file paths My drive/PML/AudioEncoding with file Sample1.mp3 and sample2.mp3

from google.colab import drive
import os

drive.mount('/content/drive')

# Define file paths
audio_dir = "/content/drive/My Drive/PML/AudioEncoding"
file1_path = os.path.join(audio_dir, "sample1.mp3")
file2_path = os.path.join(audio_dir, "sample2.mp3")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# prompt: pip install librosa numpy matplotlib scipy scikit-learn soundfile

!pip install librosa numpy matplotlib scipy scikit-learn soundfile




In [None]:
import librosa
import numpy as np

def load_audio_files(file_paths):
  """Loads audio files into a list of NumPy arrays.

  Args:
    file_paths: A list of file paths to audio files.

  Returns:
    A list of NumPy arrays, where each element is an audio file loaded as a NumPy array.
    Returns None if any error occurs during file loading.
  """
  audio_data = []
  for file_path in file_paths:
    try:
      # Load audio file using librosa
      y, sr = librosa.load(file_path, sr=None)  # sr=None preserves original sample rate
      audio_data.append(y)
    except Exception as e:
      print(f"Error loading file {file_path}: {e}")
      return None  # Return None to indicate an error
  # Instead of creating numpy array return a list of audio data
  return audio_data


# Example usage (assuming file1_path and file2_path are defined)
file_paths = [file1_path, file2_path]
audio_array = load_audio_files(file_paths)

if audio_array is not None:
  print("Audio files loaded successfully.")
  #print(f"Shape of the audio array: {audio_array.shape}") # This won't work now
  print(f"Number of audio files loaded: {len(audio_array)}")
  print(f"Shape of the first audio file: {audio_array[0].shape}")
  print(f"Shape of the second audio file: {audio_array[1].shape}")
  # Now you can work with the audio_array (as a list)
else:
  print("Error loading audio files. Check file paths and permissions.")

Audio files loaded successfully.
Number of audio files loaded: 2
Shape of the first audio file: (5384326,)
Shape of the second audio file: (9585510,)


In [None]:
# prompt: Do the Pre-emphasis Filter

def pre_emphasis_filter(audio_data, alpha=0.97):
  """Applies pre-emphasis filter to audio data.

  Args:
    audio_data: A NumPy array representing the audio signal.
    alpha: The pre-emphasis coefficient (default is 0.97).

  Returns:
    A NumPy array representing the filtered audio signal.
  """
  if not isinstance(audio_data, np.ndarray):
    print("Error: Input audio_data must be a NumPy array.")
    return None  # Handle the case where audio_data is not a NumPy array

  emphasized_audio = np.append(audio_data[0], audio_data[1:] - alpha * audio_data[:-1])
  return emphasized_audio


# Assuming audio_array is a list of audio signals (NumPy arrays)
if audio_array is not None:
  filtered_audio_signals = []
  for audio in audio_array:
    filtered_audio = pre_emphasis_filter(audio)
    if filtered_audio is not None:
        filtered_audio_signals.append(filtered_audio)
    else:
        print("Skipping pre-emphasis for an invalid audio signal.")

  if filtered_audio_signals:
    print("Pre-emphasis applied successfully.")
    print(f"Shape of the first filtered audio file: {filtered_audio_signals[0].shape}")
    print(f"Shape of the second filtered audio file: {filtered_audio_signals[1].shape}")
    # Now you can work with the filtered_audio_signals
  else:
    print("No valid audio signals to process.")


Pre-emphasis applied successfully.
Shape of the first filtered audio file: (5384326,)
Shape of the second filtered audio file: (9585510,)


Frame the Signal and Apply Windowing and can be skipped

In [None]:
# prompt: Frame the Signal and Apply Windowing

import librosa
import numpy as np

def frame_audio(audio, frame_length, hop_length):
  """Frames the audio signal.

  Args:
    audio: A NumPy array representing the audio signal.
    frame_length: The length of each frame in samples.
    hop_length: The hop length between frames in samples.

  Returns:
    A NumPy array of shape (num_frames, frame_length) representing the framed audio.
  """
  num_frames = 1 + (len(audio) - frame_length) // hop_length
  frames = np.zeros((num_frames, frame_length))
  for i in range(num_frames):
    frames[i] = audio[i * hop_length:i * hop_length + frame_length]
  return frames

def apply_window(frames, window_type='hamming'):
  """Applies a window function to each frame.

  Args:
    frames: A NumPy array of shape (num_frames, frame_length) representing the framed audio.
    window_type: The type of window function to apply ('hamming', 'hanning', etc.).

  Returns:
    A NumPy array of shape (num_frames, frame_length) representing the windowed frames.
  """
  window = getattr(np, window_type)(frames.shape[1])
  windowed_frames = frames * window
  return windowed_frames

# Example usage with the first filtered audio signal:
if filtered_audio_signals:
  frame_length = 2048  # Adjust as needed
  hop_length = 512  # Adjust as needed
  first_audio_frames = frame_audio(filtered_audio_signals[0], frame_length, hop_length)

  windowed_frames = apply_window(first_audio_frames)

  print("Framing and windowing applied successfully.")
  print(f"Shape of the first audio frames: {first_audio_frames.shape}")
  print(f"Shape of the windowed frames: {windowed_frames.shape}")
else:
  print("No valid filtered audio signals to process.")


Framing and windowing applied successfully.
Shape of the first audio frames: (10513, 2048)
Shape of the windowed frames: (10513, 2048)


In [None]:
# prompt: Extract MFCC Features

def extract_mfccs(windowed_frames, sr, n_mfcc=20):
    """Extracts MFCC features from windowed audio frames.

    Args:
        windowed_frames: A NumPy array of shape (num_frames, frame_length) representing the windowed audio frames.
        sr: The sample rate of the audio signal.
        n_mfcc: The number of MFCCs to extract (default is 20).

    Returns:
        A NumPy array of shape (num_frames, n_mfcc) representing the MFCC features.
    """
    mfccs = librosa.feature.mfcc(S=librosa.power_to_db(librosa.stft(windowed_frames.flatten(), n_fft=2048)), n_mfcc=n_mfcc, sr=sr)
    return mfccs

# Example usage with the windowed frames:
if filtered_audio_signals:
    sr = 22050  # Replace with the actual sample rate if known
    mfccs = extract_mfccs(windowed_frames, sr=sr)

    print("MFCCs extracted successfully.")
    print(f"Shape of the MFCCs: {mfccs.shape}")
else:
    print("No valid windowed frames to process.")


  mfccs = librosa.feature.mfcc(S=librosa.power_to_db(librosa.stft(windowed_frames.flatten(), n_fft=2048)), n_mfcc=n_mfcc, sr=sr)


MFCCs extracted successfully.
Shape of the MFCCs: (20, 42053)


In [None]:
# prompt: Print the freatures head after converting into dataframes

import pandas as pd

# Assuming 'mfccs' is the NumPy array containing the MFCC features
if 'mfccs' in locals():
    # Convert MFCCs to a DataFrame
    mfccs_df = pd.DataFrame(mfccs.T)  # Transpose to have frames as rows

    # Print the head of the DataFrame
    print(mfccs_df.head())
else:
    print("mfccs variable not found. Please ensure the MFCC extraction code has been executed successfully.")


           0           1          2          3          4          5   \
0 -800.099994  346.779010  57.017684 -18.100890  -8.872198 -11.257400   
1 -725.522540  429.350888  -6.554317 -14.626890  20.103321 -23.773131   
2 -752.854691  488.010365 -37.354213 -10.981864  34.226774 -28.949057   
3 -695.523674  343.011851  30.742115  -5.646907  20.640031  -2.153186   
4 -708.530001  268.682894  49.933304  -1.528434   5.986457  -2.751555   

          6          7          8          9          10         11  \
0   4.545213   3.128058 -18.592855 -20.052229  -6.435541  10.278706   
1   1.147689  -8.395159 -22.879504   3.245004 -10.003518  -3.129512   
2   2.533276 -16.780498 -22.315548  17.361293 -14.435732  -9.474195   
3 -11.699178 -16.473867  -7.107546   6.716165 -10.788779  -6.242123   
4 -10.477893 -14.535877  -6.549513   1.583280  -5.991547   0.059249   

          12         13         14         15         16         17  \
0   5.066026 -26.236538 -18.487093  19.741057  11.433748 -15.44

In [None]:
# prompt: Save the features into csv

# Save the DataFrame to a CSV file
mfccs_df.to_csv('/content/drive/My Drive/PML/Audio Encoding/mfcc_features.csv', index=False)
print("MFCC features saved to CSV file.")


MFCC features saved to CSV file.


In [None]:
import pandas as pd
from scipy.stats import skew, kurtosis

def extract_stats(mfcc_frames,n_mfcc=20):
    """
    Extracts MFCCs from an audio file and calculates statistical summaries
    for each coefficient.

    Args:
        audio_path (str): Path to the audio file.
        n_mfcc (int): Number of MFCC coefficients to extract.

    Returns:
        tuple: A tuple containing:
            - mfcc_frames (numpy.ndarray): The MFCC frame sequence (T x n_mfcc).
            - mfcc_stats (pandas.Series): A Series containing the statistical
              summary for each MFCC coefficient. Returns None if loading fails.
    """
        # Create a fixed length feature vector by creating statistical summaries of each MFCC
        # This will create and concatenate 20 statistical values calculated
        # It should be noted that the number of columns of MFCC will vary depending
        # But the feature vector will be : 20 original coefficients + (20 x number_of_stats)
        # In the following case, the feature vector will have 20 + 20 x 9 = 200 values
    mfcc_stats = {}
    for i in range(n_mfcc):
        coefficient = mfcc_frames[i, :]
        mfcc_stats[f'mfcc_{i}_mean'] = np.mean(coefficient)
        mfcc_stats[f'mfcc_{i}_min'] = np.min(coefficient)
        mfcc_stats[f'mfcc_{i}_max'] = np.max(coefficient)
        mfcc_stats[f'mfcc_{i}_25_percentile'] = np.percentile(coefficient, 25)
        mfcc_stats[f'mfcc_{i}_median'] = np.median(coefficient)
        mfcc_stats[f'mfcc_{i}_75_percentile'] = np.percentile(coefficient, 75)
        mfcc_stats[f'mfcc_{i}_std'] = np.std(coefficient)
        mfcc_stats[f'mfcc_{i}_skew'] = skew(coefficient)
        mfcc_stats[f'mfcc_{i}_kurtosis'] = kurtosis(coefficient)

    return pd.Series(mfcc_stats)



mfcc_feature_vector = extract_stats(mfccs, n_mfcc=20)

if mfcc_feature_vector is not None:
    print("\nExtracted MFCC Feature Vector:")
    print(mfcc_feature_vector)


Extracted MFCC Feature Vector:
mfcc_0_mean              -636.547758
mfcc_0_min              -1577.476206
mfcc_0_max               -159.525734
mfcc_0_25_percentile     -749.264205
mfcc_0_median            -637.756064
                            ...     
mfcc_19_median             -2.807818
mfcc_19_75_percentile       6.678061
mfcc_19_std                14.844871
mfcc_19_skew                0.418035
mfcc_19_kurtosis            1.235179
Length: 180, dtype: float64


In [None]:
# prompt: create a function which adds in dataframe and Extracted MFCC Feature Vector

def add_to_dataframe(df, feature_vector):
    """Adds a feature vector to a DataFrame.

    Args:
        df: The DataFrame to add the feature vector to.
        feature_vector: A pandas Series representing the feature vector.

    Returns:
        A new DataFrame with the feature vector added as a new row,
        or None if there's an error.
    """
    try:
        # Convert the Series to a DataFrame to enable concatenation
        feature_df = pd.DataFrame([feature_vector])

        # Concatenate the feature DataFrame with the existing DataFrame
        new_df = pd.concat([df, feature_df], ignore_index=True)
        return new_df
    except Exception as e:
        print(f"Error adding feature vector to DataFrame: {e}")
        return None




In [None]:
# prompt: create a empty dataframe using mfcc_feature_vector

mfcc_feature_vector_df = pd.DataFrame(columns=mfcc_feature_vector.index)

mfcc_feature_vector_df  = add_to_dataframe(mfcc_feature_vector_df, mfcc_feature_vector)

if mfcc_feature_vector_df is not None:
    print("\nDataFrame after adding feature vector:")
    print(mfcc_feature_vector_df.head())


DataFrame after adding feature vector:
   mfcc_0_mean   mfcc_0_min  mfcc_0_max  mfcc_0_25_percentile  mfcc_0_median  \
0  -636.547758 -1577.476206 -159.525734           -749.264205    -637.756064   

   mfcc_0_75_percentile  mfcc_0_std  mfcc_0_skew  mfcc_0_kurtosis  \
0           -499.522957  200.531308    -0.821985         2.172317   

   mfcc_1_mean  ...  mfcc_18_kurtosis  mfcc_19_mean  mfcc_19_min  mfcc_19_max  \
0   331.271729  ...          1.158693     -1.711279   -82.482551    78.539049   

   mfcc_19_25_percentile  mfcc_19_median  mfcc_19_75_percentile  mfcc_19_std  \
0             -11.151117       -2.807818               6.678061    14.844871   

   mfcc_19_skew  mfcc_19_kurtosis  
0      0.418035          1.235179  

[1 rows x 180 columns]


  new_df = pd.concat([df, feature_df], ignore_index=True)
