# Audio Feature Extraction and Clustering

This notebook demonstrates how to:
1. Extract audio features like chromagrams from audio files.
2. Cluster audio files based on their feature distributions using K-means.

## Prerequisites
Install the necessary libraries before proceeding.

In [None]:
!apt install -y ffmpeg
!pip install eyed3
!pip install pydub
!pip install pyAudioAnalysis
!pip install tensorflow pandas matplotlib numpy

## Import Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from pyAudioAnalysis import audioBasicIO, audioFeatureExtraction
import matplotlib.pyplot as plt

## Define Helper Functions
These functions extract chromagrams, compute note frequencies, and visualize features.

In [None]:
def getChromagram(audioData):
    """Generates a chromagram from the audio features."""
    temp_data = audioData[21].reshape(1, audioData[21].shape[0])
    chromagram = temp_data
    for i in range(22, 33):
        temp_data = audioData[i].reshape(1, audioData[i].shape[0])
        chromagram = np.vstack([chromagram, temp_data])
    return chromagram

def getNoteFrequency(chromagram):
    """Computes the note frequency from the chromagram."""
    numberOfWindows = chromagram.shape[1]
    freqVal = chromagram.argmax(axis=0)
    histogram, _ = np.histogram(freqVal, bins=12)
    normalized_hist = histogram.reshape(1, 12).astype(float) / numberOfWindows
    return normalized_hist

def plotHeatmap(chromagram, smallSample=True):
    """Plots a heatmap for the chromagram."""
    notesLabels = ["G#", "G", "F#", "F", "E", "D#", "D", "C#", "C", "B", "A#", "A"]
    fig, axis = plt.subplots()
    im = axis.imshow(chromagram[:, 0:25] if smallSample else chromagram, cmap="YlGn")
    cbar = axis.figure.colorbar(im, ax=axis, cmap="YlGn")
    cbar.ax.set_ylabel("Amplitude", rotation=-90, va="bottom")
    axis.set_yticks(np.arange(len(notesLabels)))
    axis.set_yticklabels(notesLabels)
    axis.set_title("Chromagram")
    plt.show()

def noteFrequencyPlot(noteFrequency):
    """Plots the note frequency distribution."""
    fig, axis = plt.subplots()
    axis.plot(np.arange(1, 13), noteFrequency[0, :])
    plt.title("Note Frequency Distribution")
    plt.show()

## Preprocess Audio Data
Define a function to preprocess and extract features from an audio file.

In [None]:
def preProcess(file_path):
    """Extracts audio features from the given file."""
    [Fs, x] = audioBasicIO.read_audio_file(file_path)
    features, feature_names = audioFeatureExtraction.st_feature_extraction(x, Fs, 0.050 * Fs, 0.025 * Fs)
    return feature_names, features

## Example: Extract Features for a Single Audio File

In [None]:
# Example File
file_path = "../input/sample_audio.wav"  # Replace with the actual path

# Extract Features
feature_name, features = preProcess(file_path)
chromagram = getChromagram(features)
plotHeatmap(chromagram)

# Compute and Plot Note Frequency
noteFrequency = getNoteFrequency(chromagram)
noteFrequencyPlot(noteFrequency)

## Generate Dataset
Iterate over multiple audio files and create a dataset of note frequency arrays.

In [None]:
fileList = []

def getDataset(filePath):
    """Generates a dataset from audio files in a directory."""
    X = pd.DataFrame()
    columns = ["G#", "G", "F#", "F", "E", "D#", "D", "C#", "C", "B", "A#", "A"]
    
    for root, dirs, filenames in os.walk(filePath):
        for file in filenames:
            fileList.append(file)
            feature_name, features = preProcess(filePath + file)
            chromagram = getChromagram(features)
            noteFrequency = getNoteFrequency(chromagram)
            x_new = pd.Series(noteFrequency[0, :])
            X = pd.concat([X, x_new], axis=1)

    data = X.T.copy()
    data.columns = columns
    data.index = [i for i in range(data.shape[0])]
    return data

dataset_path = "../input/"  # Replace with actual directory
data = getDataset(dataset_path)
print(data.head())

## K-means Clustering on Audio Features

In [None]:
# Hyperparameters
k = 3
epochs = 2000

# Initialize Variables
X = tf.Variable(data.values, name="X")
X_labels = tf.Variable(tf.zeros(shape=(X.shape[0],), dtype=tf.int64), name="X_labels")
C = tf.Variable(data.values[:k], name="C")

# Training Loop
for epoch in range(epochs):
    X_labels.assign(tf.argmin(tf.reduce_sum(tf.square(tf.expand_dims(X, 0) - tf.expand_dims(C, 1)), axis=2), axis=0))
    sums = tf.math.unsorted_segment_sum(X, X_labels, k)
    counts = tf.math.unsorted_segment_sum(tf.ones_like(X), X_labels, k)
    C.assign(sums / counts)

# Assign Final Labels
final_labels = pd.DataFrame({"Labels": X_labels.numpy(), "File Names": fileList})
print(final_labels)

## Visualize Clustering Results

In [None]:
plt.scatter(data.iloc[:, 0], data.iloc[:, 1], c=final_labels["Labels"], cmap="viridis")
plt.title("Clustering Results")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()