<a href="https://colab.research.google.com/github/samuveljebakumar/fish/blob/main/wast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Enable eager execution for TensorFlow
tf.config.run_functions_eagerly(True)



In [None]:
from google.colab import drive
# Mount Drive
drive.mount('/content/drive')

In [None]:
# Configuration
DATASET_PATH = "/content/drive/MyDrive/data set"
SAMPLE_RATE = 16000
N_MFCC = 40
MAX_LEN = 174

In [None]:
def extract_mfcc(file_path):
    audio, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=N_MFCC)

    if mfcc.shape[1] < MAX_LEN:
        pad_width = MAX_LEN - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0,0),(0,pad_width)))
    else:
        mfcc = mfcc[:, :MAX_LEN]

    return mfcc


In [None]:
X = []
y = []

for category_folder_name in os.listdir(DATASET_PATH):
    category_path = os.path.join(DATASET_PATH, category_folder_name)

    if not os.path.isdir(category_path):
        continue

    for species_folder_name in os.listdir(category_path):
        species_path = os.path.join(category_path, species_folder_name)

        if not os.path.isdir(species_path):
            continue

        for file_name in os.listdir(species_path):
            if file_name.endswith(".wav") or file_name.endswith(".mp3"):
                file_path = os.path.join(species_path, file_name)
                mfcc = extract_mfcc(file_path)

                X.append(mfcc)
                y.append(species_folder_name)

X = np.array(X)
X = X[..., np.newaxis]

In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print("Bird Classes:", label_encoder.classes_)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


In [None]:
model = Sequential()

model.add(Conv2D(32, (3,3), activation='relu', input_shape=X_train.shape[1:]))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32
)


In [None]:
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc * 100)

In [None]:
def predict_bird_species(audio_file_path, model, encoder):
    try:
        # Load and preprocess the new audio file
        audio = load_audio(audio_file_path)
        clips = split_audio(audio)

        if not clips:
            print(f"No valid audio clips found in {audio_file_path}")
            return None

        mels = []
        for clip in clips:
            mel = extract_mel(clip)
            mels.append(mel)

        # Convert list of mels to numpy array and add channel dimension
        X_new = np.array(mels)
        X_new = X_new[..., np.newaxis]

        # Make predictions
        predictions = model.predict(X_new)

        # Average predictions across all clips if multiple clips are present
        # Or take the prediction for the single clip if only one
        avg_prediction = np.mean(predictions, axis=0)

        # Get the predicted class index
        predicted_class_idx = np.argmax(avg_prediction)

        # Decode the predicted class index back to the bird species name
        predicted_species = encoder.inverse_transform([predicted_class_idx])

        return predicted_species[0]

    except Exception as e:
        print(f"Error during prediction for {audio_file_path}: {e}")
        return None


In [None]:
model.save("/content/drive/MyDrive/bird_sound_model.h5")
np.save("/content/drive/MyDrive/bird_labels.npy", label_encoder.classes_)


In [None]:
from tensorflow.keras.models import load_model

model = load_model("/content/drive/MyDrive/bird_sound_model.h5")
labels = np.load("/content/drive/MyDrive/bird_labels.npy")




In [None]:
def predict_bird(audio_file):
    mfcc = extract_mfcc(audio_file)
    mfcc = mfcc[np.newaxis, ..., np.newaxis]
    prediction = model.predict(mfcc)
    predicted_index = np.argmax(prediction)
    return labels[predicted_index]

# List of audio files to predict
# You can customize this list with your own audio files
test_audio_files = [
    "/content/WhatsApp Audio 2026-01-15 at 7.50.18 PM.mp4"
]

for audio_file in test_audio_files:
    print(f"Predicting for {audio_file.split('/')[-1]}:")
    predicted_species = predict_bird(audio_file)
    print(f"Predicted Bird: {predicted_species}\n")

In [None]:
def predict_bird(audio_file):
    mfcc = extract_mfcc(audio_file)
    mfcc = mfcc[np.newaxis, ..., np.newaxis]
    prediction = model.predict(mfcc)
    predicted_index = np.argmax(prediction)
    return labels[predicted_index]

# List of audio files to predict
# You can customize this list with your own audio files
test_audio_files = [
    "/content/WhatsApp Audio 2026-01-15 at 7.50.18 PM.mp4", # Example for one file
    "/content/WhatsApp Audio 2026-01-15 at 8.07.00 PM.mp4", # Example for two files
    "/content/WhatsApp Audio 2026-01-16 at 3.03.59 PM.mp4"  # Example for multiple files
]

for audio_file in test_audio_files:
    print(f"Predicting for {audio_file.split('/')[-1]}:")
    predicted_species = predict_bird(audio_file)
    print(f"Predicted Bird: {predicted_species}\n")

In [None]:
def predict_bird(audio_file):
    mfcc = extract_mfcc(audio_file)
    mfcc = mfcc[np.newaxis, ..., np.newaxis]
    prediction = model.predict(mfcc)
    predicted_index = np.argmax(prediction)
    return labels[predicted_index]

# List of audio files to predict
# You can customize this list with your own audio files
test_audio_files = [
    "/content/WhatsApp Audio 2026-01-15 at 7.51.28 PM.mp4",
    "/content/WhatsApp Audio 2026-01-15 at 7.51.28 PM.mp4",
    "/content/WhatsApp Audio 2026-01-15 at 7.51.28 PM.mp4"
]

for audio_file in test_audio_files:
    print(f"Predicting for {audio_file.split('/')[-1]}:")
    predicted_species = predict_bird(audio_file)
    print(f"Predicted Bird: {predicted_species}\n")