In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
!apt install ffmpeg
!pip install spleeter

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
Collecting numpy<2.0.0 (from spleeter)
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting tensorflow==2.12.1 (from spleeter)
  Using cached tensorflow-2.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting keras<2.13,>=2.12.0 (from tensorflow==2.12.1->spleeter)
  Using cached keras-2.12.0-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting numpy<2.0.0 (from spleeter)
  Using cached numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting tensorboard<2.13,>=2.12 (from tensorflow==2.12.1->spleeter)
  Using cached tensorboard-2.12.3-py3-none-any.whl.metadata (1.8 kB)
Collecting typing-extensions<4.6.0,>=3.6.6 (from tensorfl

In [1]:
!pip install pydub



In [2]:
!pip install pyTelegramBotAPI




In [None]:
import nest_asyncio
import telebot
import tensorflow as tf
import numpy as np
from pydub import AudioSegment
from spleeter.separator import Separator
import os
import time

nest_asyncio.apply()

# Load trained models
MODEL_PATH_SINGER = "/content/drive/MyDrive/pre_trained_model/final_model_singer.h5"
model_singer = tf.keras.models.load_model(MODEL_PATH_SINGER)

MODEL_PATH_CLASSMATE = "/content/drive/MyDrive/pre_trained_model/final_model_classmate.h5"
model_classmate = tf.keras.models.load_model(MODEL_PATH_CLASSMATE, compile=False)

# Initialize bot
TOKEN = "7734359484:AAGxQymoqh04iC2gLLlsTz8n1Py2gSASGMw"
bot = telebot.TeleBot(TOKEN, parse_mode="Markdown")

# Track recognition mode
USER_MODE = {}

@bot.message_handler(commands=["singer"])
def singer_command(message):
    USER_MODE[message.chat.id] = "singer"
    bot.send_message(message.chat.id, "✅ Mode set to *Singer recognition*. Send a **song** 🎶 to analyze!")

@bot.message_handler(commands=["friend"])
def friend_command(message):
    USER_MODE[message.chat.id] = "friend"
    bot.send_message(message.chat.id, "✅ Mode set to *Friend recognition*. Send a voice message 🎙️ to identify!")

@bot.message_handler(content_types=["audio", "voice"])
def handle_audio(message):
    chat_id = message.chat.id
    voice = message.audio if message.audio else message.voice

    if not voice:
        bot.send_message(chat_id, "❌ Please send a valid audio file.")
        return

    bot.send_message(chat_id, "⏳ Waiting for processing...")

    # Download the audio file
    audio_file = bot.get_file(voice.file_id)
    audio_path = "audio.ogg"
    downloaded_file = bot.download_file(audio_file.file_path)

    # Save the downloaded file
    with open(audio_path, "wb") as new_file:
        new_file.write(downloaded_file)

    # Convert to WAV format
    audio = AudioSegment.from_file(audio_path)
    audio = audio.set_channels(1).set_frame_rate(48000)
    processed_audio_path = "processed_audio.wav"
    audio.export(processed_audio_path, format="wav")

    # **Debug Log for Mode Selection**
    mode = USER_MODE.get(chat_id)
    print(f"User mode selected: {mode}")

    if mode == "singer":
        # Process singer recognition logic ONLY
        model = model_singer

        # Extract vocals using Spleeter
        vocals_dir = "spleeter_output/processed_audio/processed_audio"
        os.makedirs(vocals_dir, exist_ok=True)

        separator = Separator("spleeter:2stems")
        separator.separate_to_file(processed_audio_path, vocals_dir)

        # Allow time for Spleeter to generate files
        time.sleep(2)

        # Construct path for extracted vocals
        vocal_file_path = f"{vocals_dir}/vocals.wav"

        # Ensure vocal file exists before processing
        if not os.path.exists(vocal_file_path):
            print(f"❌ Debug: File not found at {vocal_file_path}")
            bot.send_message(chat_id, f"❌ Error: Extracted vocal file not found at {vocal_file_path}")
            return

        # Load and preprocess extracted vocals
        audio = AudioSegment.from_file(vocal_file_path)
        audio = audio.set_channels(1).set_frame_rate(48000).set_sample_width(2)
        final_audio_path = "processed_vocals.wav"
        audio.export(final_audio_path, format="wav")

        # Convert to TensorFlow tensor
        audio_binary = tf.io.read_file(final_audio_path)
        audio_tensor, _ = tf.audio.decode_wav(audio_binary, desired_channels=1, desired_samples=48000)
        audio_tensor = tf.expand_dims(audio_tensor, axis=0)

    elif mode == "friend":
        # Process direct voice data for friend recognition ONLY
        samples = np.array(audio.get_array_of_samples(), dtype=np.float32)

        if samples.size == 0:
            samples = np.zeros((48000,), dtype=np.float32)  # Default empty array if extraction fails

        # Ensure exactly 48000 samples
        if len(samples) < 48000:
            samples = np.pad(samples, (0, 48000 - len(samples)), mode="constant", constant_values=0.0)
        elif len(samples) > 48000:
            samples = samples[:48000]

        #Reshape to match TensorFlow model requirements
        audio_tensor = samples.reshape((1, 48000, 1))
        model = model_classmate

    else:
        bot.send_message(chat_id, "❌ Error: Please select either /singer or /friend before sending audio.")
        return

    try:
        # Perform prediction
        print(f"Shape of input to model: {audio_tensor.shape}")
        prediction = model.predict(audio_tensor)
        predicted_class = np.argmax(prediction, axis=1)[0]
        bot.send_message(chat_id, f"✅ Recognized result: {predicted_class}")

    except Exception as e:
        bot.send_message(chat_id, f"⚠️ Error during prediction: {str(e)}")

def main():
    bot.infinity_polling()

if __name__ == "__main__":
    main()
