In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import cv2
import numpy as np
import librosa

def solution(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    n_fft = 2048
    hop_length = 512
    spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, fmax=22000)
    # spec_db = librosa.power_to_db(spec, ref=np.max)
    spec_db = 10 * np.log10(spec.astype(float) + 1e-10)

    #Obtaining spectrogram
    spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))

    #Relevant features from the spectrogram helping distinguish between metal and cardboard
    mean_energy = np.mean(spectrogram)
    max_energy = np.max(spectrogram)
    spectral_centroid = librosa.feature.spectral_centroid(y = y, sr = sr, n_fft = 2048, hop_length = 512)[0]
    spectral_centroid_mean = np.mean(spectral_centroid)

    #Defining thresholds based on analysis
    mean_energy_threshold = 0.7
    max_energy_threshold = 90.0
    sc_mean_threshold = 4000.0

    #Decision Making
    x1 = int(mean_energy > mean_energy_threshold)
    x2 = int(max_energy > max_energy_threshold)
    x3 = int(spectral_centroid_mean < sc_mean_threshold)
    if x1 + x2 + x3 >= 2:
      return 'metal'
    else:
      return 'cardboard'

In [6]:
audio_path = '/content/drive/My Drive/Assignment 1/Assignment 1/Q2/test/cardboard1.mp3'
solution(audio_path)

'cardboard'

In [7]:
audio_path = '/content/drive/My Drive/Assignment 1/Assignment 1/Q2/test/cardboard2.mp3'
solution(audio_path)

'cardboard'

In [8]:
audio_path = '/content/drive/My Drive/Assignment 1/Assignment 1/Q2/test/metal_banging1.mp3'
solution(audio_path)

'metal'

In [9]:
audio_path = '/content/drive/My Drive/Assignment 1/Assignment 1/Q2/test/metal_banging2.mp3'
solution(audio_path)

'metal'