In [1]:
! pip install tensorflow_hub



In [None]:
import os
import numpy as np
import tensorflow as tf
import librosa
import soundfile as sf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import MultiLabelBinarizer
from vggish_input import waveform_to_examples
from vggish import VGGish
from vggish_params import Params
from vggish_postprocess import Postprocessor

# Load Google's VGGish model for feature extraction
vggish = VGGish()
params = Params()
pproc = Postprocessor()

# Function to extract features from a 5s audio clip
def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=params.SAMPLE_RATE)
    features = waveform_to_examples(y, sr)  # Convert waveform to VGGish input
    return vggish(features)

# Load dataset and preprocess
def load_data(audio_folder, label_file):
    audio_files = os.listdir(audio_folder)
    labels = {}  # Map audio filenames to labels
    
    with open(label_file, 'r') as f:
        for line in f:
            parts = line.strip().split(',')
            labels[parts[0]] = parts[1:]
    
    X, Y = [], []
    mlb = MultiLabelBinarizer()
    mlb.fit(labels.values())
    
    for file in audio_files:
        path = os.path.join(audio_folder, file)
        features = extract_features(path)
        X.append(features)
        Y.append(labels.get(file, []))
    
    Y = mlb.transform(Y)  # Convert labels to multi-hot encoding
    return np.array(X), np.array(Y), mlb

# Train Multi-Label Classification Model
def build_model(input_shape, num_classes):
    model = Sequential([
        Dense(512, activation='relu', input_shape=input_shape),
        Dropout(0.5),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='sigmoid')  # Sigmoid for multi-label
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train the model
audio_folder = 'C:\Users\SWAPNIL JAIN\Desktop\Audio_classification\Audio_Classification\artifacts\data_preprocessing'
label_file = 'path_to_labels.csv'
X, Y, mlb = load_data(audio_folder, label_file)
X = X.reshape(X.shape[0], -1)  # Flatten features
model = build_model((X.shape[1],), len(mlb.classes_))
model.fit(X, Y, epochs=20, batch_size=32, validation_split=0.2)
model.save('poaching_detector.h5')

# Inference Pipeline
def predict_audio(audio_path, model, mlb):
    y, sr = librosa.load(audio_path, sr=params.SAMPLE_RATE)
    chunks = [y[i:i+5*sr] for i in range(0, len(y), 5*sr)]  # Split into 5s chunks
    
    results = []
    for chunk in chunks:
        features = waveform_to_examples(chunk, sr)
        features = features.reshape(1, -1)  # Flatten for model input
        pred = model.predict(features)
        results.append(pred)
    
    avg_preds = np.mean(results, axis=0)
    final_labels = [mlb.classes_[i] for i in range(len(avg_preds)) if avg_preds[i] > 0.5]
    return final_labels

# Example Usage
predictions = predict_audio('test_audio.wav', model, mlb)
print("Detected Classes:", predictions)