In [2]:
! pip install pandas



In [3]:
! pip install joblib



In [6]:
import os

In [9]:
%pwd

'c:\\Users\\SWAPNIL JAIN\\Desktop\\Audio_classification\\Audio_Classification'

In [8]:
os.chdir("../")

In [15]:
import mlflow
import mlflow.tensorflow
import pandas as pd
import joblib
import os
import tensorflow as tf
import numpy as np
import tensorflow_hub as hub
import librosa
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

# Set the MLflow tracking URI to the local server
mlflow.set_tracking_uri('http://127.0.0.1:5000')

# Load VGGish model
vggish = hub.load("https://tfhub.dev/google/vggish/1")

# Function to extract features from audio files
def extract_vggish_features(audio_path):
    audio, sr = librosa.load(audio_path, sr=16000, mono=True)
    audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
    tf.config.run_functions_eagerly(True)
    embeddings = vggish(audio_tensor)
    return tf.keras.backend.eval(embeddings)

# Load dataset from a CSV file
def load_dataset(csv_file, label_map):
    df = pd.read_csv(csv_file)
    audio_paths = df["file_path"].tolist()

    # Convert string labels to actual list of labels
    raw_labels = [eval(label_str) for label_str in df["labels_text"]]

    # Convert labels to binary format (multi-hot encoding)
    mlb = MultiLabelBinarizer(classes=list(label_map.keys()))
    encoded_labels = mlb.fit_transform(raw_labels)

    return audio_paths, encoded_labels, mlb

# Label mapping
label_map = {
    'Fire': 1, 'Rain': 2, 'Thunderstorm': 3, 'WaterDrops': 4, 'Wind': 5, 'Silence': 6, 'TreeFalling': 7, "Helicopter": 8,
   "VehicleEngine":9, "Axe":10, "Chainsaw":11, "Generator":12, "Handsaw":13,  "Firework":14, "Gunshot":15,  "WoodChop":16,
   "Whistling":17,"Speaking":18,"Footsteps":19,"Clapping":20, "Insect":21, "Frog":22,"BirdChapping":23,"WingFlapping":24,
   "Lion":25, "WolfHowl":26, "Squirrel":27
}

# Load dataset
csv_file_path = r"C:\Users\SWAPNIL JAIN\Desktop\Audio_classification\Audio_Classification\preprocessed_data.csv"
audio_paths, encoded_labels, mlb = load_dataset(csv_file_path, label_map)

# Extract features
features = np.stack([extract_vggish_features(path) for path in audio_paths], axis=0)

# Split dataset
X_train, X_val, y_train, y_val = train_test_split(features, encoded_labels, test_size=0.2, random_state=42)

# Define model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(5, 128)),  
    tf.keras.layers.LSTM(128, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(len(label_map), activation="sigmoid")  
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train model with MLflow logging
with mlflow.start_run():
       
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)
    mlflow.tensorflow.log_model(model, "model")
    # Log parameters
    mlflow.log_params({
        "epochs": 20,
        "batch_size": 32
    })

    # Log metrics
    for epoch in range(20):
        mlflow.log_metric("train_loss", history.history["loss"][epoch], step=epoch)
        mlflow.log_metric("val_loss", history.history["val_loss"][epoch], step=epoch)
        mlflow.log_metric("train_accuracy", history.history["accuracy"][epoch], step=epoch)
        mlflow.log_metric("val_accuracy", history.history["val_accuracy"][epoch], step=epoch)

    
    
    # Save as a pickle file and log as artifact
    joblib.dump(model, 'model.pkl')
    mlflow.log_artifact('model.pkl')

print("Model successfully logged in MLflow!")

# Prediction function
def predict_and_decode(model, audio_paths, mlb, threshold=0.5):
    features = np.array([extract_vggish_features(path) for path in audio_paths])
    predictions = model.predict(features)
    
    binary_predictions = (predictions > threshold).astype(int)
    decoded_predictions = [mlb.inverse_transform([prediction])[0] for prediction in binary_predictions]
    
    return decoded_predictions

# Example prediction
audio_sample_paths = [r"artifacts/data_preprocessing/processed_audio_data\mixed_5.wav", 
                      r"artifacts/data_preprocessing/processed_audio_data\mixed_10.wav"]
predicted_labels = predict_and_decode(model, audio_sample_paths, mlb)
print(predicted_labels)


Epoch 1/20




[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 99ms/step - accuracy: 0.0622 - loss: 0.3997 - val_accuracy: 0.2270 - val_loss: 0.2689
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 92ms/step - accuracy: 0.2349 - loss: 0.2639 - val_accuracy: 0.2860 - val_loss: 0.2497
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 113ms/step - accuracy: 0.2963 - loss: 0.2440 - val_accuracy: 0.3130 - val_loss: 0.2434
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 103ms/step - accuracy: 0.3192 - loss: 0.2337 - val_accuracy: 0.3190 - val_loss: 0.2339
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 94ms/step - accuracy: 0.3285 - loss: 0.2240 - val_accuracy: 0.3290 - val_loss: 0.2337
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 92ms/step - accuracy: 0.3337 - loss: 0.2168 - val_accuracy: 0.3190 - val_loss: 0.2306
Epoch 7/20
[1m125/125[0



🏃 View run flawless-auk-142 at: http://127.0.0.1:5000/#/experiments/0/runs/73e8c66c1eb14780b4963d37073db96e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0
Model successfully logged in MLflow!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step


AttributeError: 'list' object has no attribute 'shape'

In [29]:
pip install json

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement json (from versions: none)
ERROR: No matching distribution found for json


In [38]:
import json

# Corrected Prediction Function with JSON Formatting
def predict_and_decode(model, audio_paths, mlb, threshold=0.5):
    features = [extract_vggish_features(path) for path in audio_paths]  # List of (5, 128) arrays
    
    # Ensure features are stacked properly
    features = np.stack(features, axis=0)  # Shape: (num_samples, 5, 128)
    
    # Ensure batch dimension is correctly formatted
    predictions = model.predict(features)  # Shape: (num_samples, num_classes)
    
    # Convert probabilities to binary labels using threshold
    binary_predictions = (predictions > threshold).astype(int)  # Shape: (num_samples, num_classes)
    
    # Decode multi-hot encoded predictions into human-readable labels
    decoded_predictions = mlb.inverse_transform(binary_predictions)  # Returns list of label tuples

    # Convert the predictions into a JSON-compatible structure
    json_predictions = json.dumps({"predictions": [list(labels) for labels in decoded_predictions]})
    
    return json_predictions  # Return predictions in JSON format

# Example prediction
audio_sample_paths = [
    r"artifacts/data_preprocessing/processed_audio_data/mixed_5.wav",
    r"artifacts/data_preprocessing/processed_audio_data/mixed_10.wav"
]

predicted_labels_json = predict_and_decode(model, audio_sample_paths, mlb)
print(predicted_labels_json)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
{"predictions": [["Helicopter", "Insect"], ["Firework", "Speaking"]]}
