<a href="https://colab.research.google.com/github/samuveljebakumar/fish/blob/main/cnn_and_lstm_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Reshape
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
DATASET_PATH = "/content/drive/MyDrive/all data set"

SAMPLE_RATE = 16000
DURATION = 3
SAMPLES = SAMPLE_RATE * DURATION

N_MELS = 128
N_FFT = 1024
HOP_LENGTH = 512

In [None]:
def load_audio(file_path):
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)
    audio = librosa.util.normalize(audio)
    return audio

def split_audio(audio):
    clips = []
    for i in range(0, len(audio) - SAMPLES, SAMPLES):
        clips.append(audio[i:i+SAMPLES])
    return clips

def extract_mel(audio_clip):
    mel = librosa.feature.melspectrogram(
        y=audio_clip,
        sr=SAMPLE_RATE,
        n_fft=N_FFT,
        hop_length=HOP_LENGTH,
        n_mels=N_MELS
    )
    mel_db = librosa.power_to_db(mel, ref=np.max)
    return mel_db


In [None]:
X = []
y = []

for category_folder in os.listdir(DATASET_PATH):
    category_path = os.path.join(DATASET_PATH, category_folder)
    if not os.path.isdir(category_path):
        continue

    print(f"Processing category: {category_folder}") # Diagnostic print for main category (e.g., train/test)

    for species_folder in os.listdir(category_path):
        species_path = os.path.join(category_path, species_folder)
        if not os.path.isdir(species_path):
            continue

        print(f"  Processing bird species: {species_folder}") # Diagnostic print for species

        for audio_file_name in os.listdir(species_path):
            file_path = os.path.join(species_path, audio_file_name)
            # Skip if it's not a file (e.g., another subdirectory or a hidden file)
            if not os.path.isfile(file_path):
                continue

            try:
                audio = load_audio(file_path)
                clips = split_audio(audio)

                for clip in clips:
                    mel = extract_mel(clip)
                    X.append(mel)
                    y.append(species_folder) # Assign species_folder as the label
                print(f"    Processed file: {audio_file_name} - clips found: {len(clips)}") # Diagnostic print for file
            except Exception as e:
                print(f"    Error processing file {file_path}: {e}") # Added error handling for file processing

X = np.array(X)
X = X[..., np.newaxis]  # add channel

In [None]:
print("Number of samples:", len(X))
print("Number of labels:", len(y))
print("First 10 labels:", y[:10])


In [None]:

encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_cat = to_categorical(y_encoded)


In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, random_state=42
)


In [None]:
from tensorflow.keras.layers import BatchNormalization, Bidirectional, GlobalAveragePooling2D

model = Sequential()

# First CNN block
model.add(Conv2D(64, (3,3), activation='relu', input_shape=(N_MELS, X.shape[2], 1)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.25))

# Second CNN block
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.25))

# Third CNN block
model.add(Conv2D(256, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.25))

# Reshape output for LSTM
# Calculate the output shape dynamically for the reshape layer
# The input to LSTM needs to be 3D (batch_size, timesteps, features)
# Current model.output_shape before reshape: (None, height, width, channels)
# We want (None, height, width * channels)
reshape_dim_1 = model.output_shape[1] # height (timesteps)
reshape_dim_2 = model.output_shape[2] * model.output_shape[3] # width * channels (features)
model.add(Reshape((reshape_dim_1, reshape_dim_2)))

# LSTM layer (implementation=2 for TFLite compatibility)
model.add(Bidirectional(LSTM(128, return_sequences=True, implementation=2))) # Changed implementation to 2
model.add(Dropout(0.4))

model.add(LSTM(64, implementation=2)) # Changed implementation to 2
model.add(Dropout(0.4))

# Output layer
model.add(Dense(len(encoder.classes_), activation='softmax'))

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:

history = model.fit(
    X_train, y_train,
    epochs=40,
    batch_size=16,
    validation_data=(X_test, y_test)
)


In [None]:
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc * 100)


In [None]:
def predict_bird_species(audio_file_path, model, encoder):
    try:
        # Load and preprocess the new audio file
        audio = load_audio(audio_file_path)
        clips = split_audio(audio)

        if not clips:
            print(f"No valid audio clips found in {audio_file_path}")
            return None

        mels = []
        for clip in clips:
            mel = extract_mel(clip)
            mels.append(mel)

        # Convert list of mels to numpy array and add channel dimension
        X_new = np.array(mels)
        X_new = X_new[..., np.newaxis]

        # Make predictions
        predictions = model.predict(X_new)

        # Average predictions across all clips if multiple clips are present
        # Or take the prediction for the single clip if only one
        avg_prediction = np.mean(predictions, axis=0)

        # Get the predicted class index
        predicted_class_idx = np.argmax(avg_prediction)

        # Decode the predicted class index back to the bird species name
        predicted_species = encoder.inverse_transform([predicted_class_idx])

        return predicted_species[0]

    except Exception as e:
        print(f"Error during prediction for {audio_file_path}: {e}")
        return None


In [None]:
# Example usage with a file from the test dataset (replace with your new audio file path)
new_audio_file ='/content/WhatsApp Audio 2026-01-12 at 11.11.03 PM (2).mp4'

predicted_bird = predict_bird_species(new_audio_file, model, encoder)

if predicted_bird:
    print(f"The predicted bird species is: {predicted_bird}")


In [None]:
print("Bird species in the dataset:")
for i, species in enumerate(encoder.classes_):
    print(f"{i+1}. {species}")

In [None]:
import tensorflow as tf
from google.colab import files
import os

print("üöÄ Starting conversion process...")

# 1. Check if model exists in memory
if 'model' not in locals() and 'model' not in globals():
    print("‚ùå Error: The variable 'model' is not found.")
    print("   Please run your training cell (the one with model.fit) again first!")
else:
    try:
        # ==========================================
        # STEP 1: Save the Master Backup (.h5)
        # ==========================================
        model.save('bird_model_master.h5')
        print("‚úÖ Backup model (.h5) saved.")

        # ==========================================
        # STEP 2: Configure Converter for LSTM/Bidirectional
        # ==========================================
        converter = tf.lite.TFLiteConverter.from_keras_model(model)

        # CRITICAL FIX 1: Enable TensorFlow Ops and Custom Ops
        # This allows the Pi to run complex layers like LSTM/Bidirectional
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS, # Standard Lite ops
            tf.lite.OpsSet.SELECT_TF_OPS    # Full TensorFlow ops (needed for your layers)
        ]
        converter.allow_custom_ops = True # Explicitly allow custom operations

        # CRITICAL FIX 2: Relax optimization constraints
        # Sometimes 'DEFAULT' optimization breaks complex RNNs.
        # We will try standard conversion first.
        converter._experimental_lower_tensor_list_ops = False

        print("‚è≥ Converting model to TFLite... (This may take 1-2 minutes)")
        tflite_model = converter.convert()

        # ==========================================
        # STEP 3: Save and Download
        # ==========================================
        tflite_filename = 'bird_model_fixed.tflite'
        with open(tflite_filename, 'wb') as f:
            f.write(tflite_model)

        print(f"‚úÖ Success! Converted model saved as '{tflite_filename}'")
        print(f"üìè Model size: {len(tflite_model) / 1024 / 1024:.2f} MB")

        # Automatic Download
        print("‚¨áÔ∏è Downloading to your computer...")
        files.download(tflite_filename)
        files.download('bird_model_master.h5')

    except Exception as e:
        print("\n‚ùå CONVERSION FAILED!")
        print(f"Error details: {str(e)}")
        print("\nPossible fix: If the error mentions 'static shape', re-build your model with a fixed input_shape.")