In [None]:
# =============================================================
# 1) Install Necessary Libraries
# =============================================================

# Install system-level dependencies
!sudo apt-get update && sudo apt-get install -y ffmpeg

# Upgrade pip to the latest version
!pip install --upgrade pip

# Install PyTorch with CUDA support
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118

# Install other Python packages with specific versions where needed
!pip install --upgrade \
    vosk \
    pydub \
    TTS \
    transformers \
    accelerate \
    sentencepiece \
    bitsandbytes \
    pandas \
    jedi \
    networkx \
    decorator \
    scipy==1.10.1 \
    numba \
    numpy

# =============================================================
# 2) Import Libraries Incrementally and Verify GPU Availability
# =============================================================

import os
import glob
import json
import torch
from vosk import Model, KaldiRecognizer
from pydub import AudioSegment

# Import TTS library
try:
    from TTS.api import TTS
except ModuleNotFoundError:
    !pip install TTS
    from TTS.api import TTS

from IPython.display import Audio, display
from huggingface_hub import login
from transformers import pipeline
import accelerate
import sentencepiece
import bitsandbytes
import librosa
import numba

# Ensure that the GPU is available and print the device name
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# =============================================================
# 3A) Log in to Hugging Face Hub Securely
# =============================================================
from getpass import getpass

hf_token = getpass("Enter your Hugging Face token: ")
login(token=hf_token)


0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:6 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Fetched 257 kB in 2s (160 kB/s)
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry miss

In [None]:
# =============================================================
# 3B) Remove Old Vosk Model Zip Files
# =============================================================
def remove_old_vosk_files():
    pattern = "vosk-model-small-en-us-*.zip"
    vosk_files = glob.glob(pattern)
    if len(vosk_files) > 1:
        vosk_files.sort(key=os.path.getmtime)
        files_to_delete = vosk_files[:-1]
        for file_path in files_to_delete:
            try:
                os.remove(file_path)
                print(f"Deleted old Vosk model zip: {file_path}")
            except Exception as e:
                print(f"Error deleting {file_path}: {e}")
    else:
        print("No old Vosk model zip files to delete or only one file is present.")

remove_old_vosk_files()

# =============================================================
# 3C) Download and Unzip the Latest Vosk Model
# =============================================================
if not os.path.exists("vosk-model-small-en-us-0.15.zip"):
    !wget -q https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
    print("Downloaded Vosk model zip.")
else:
    print("Vosk model zip already downloaded.")

!unzip -o vosk-model-small-en-us-0.15.zip  # Overwrite if it exists

MODEL_PATH = "vosk-model-small-en-us-0.15"

# =============================================================
# 3D) Verify Vosk Model Path and Required Files
# =============================================================
if not os.path.exists(MODEL_PATH):
    raise Exception(f"Model directory {MODEL_PATH} does not exist")

required_files = ["am", "conf", "ivector", "graph"]
for file_ in required_files:
    if not os.path.exists(os.path.join(MODEL_PATH, file_)):
        raise Exception(f"Required model file {file_} not found in {MODEL_PATH}")

# =============================================================
# 3E) Load the Vosk Model
# =============================================================
def load_vosk_model(model_path):
    print("✅ Loading Vosk model...")
    try:
        model = Model(model_path)
        print("✅ Vosk model loaded successfully!")
        return model
    except Exception as e:
        print(f"❌ Error loading Vosk model: {e}")
        raise

# =============================================================
# 3F) Transcribe Audio Using Vosk
# =============================================================
def transcribe_audio(model, audio_file_path):
    print(f"▶️ Starting transcription for {audio_file_path}...")
    recognizer = KaldiRecognizer(model, 16000)
    recognizer.SetWords(True)

    # Convert audio to 16kHz, mono WAV in memory
    audio = AudioSegment.from_file(audio_file_path)
    audio = audio.set_frame_rate(16000).set_channels(1)
    audio_data = audio.raw_data

    transcription = ""
    chunk_size = 4000

    for i in range(0, len(audio_data), chunk_size):
        chunk = audio_data[i : i + chunk_size]
        if recognizer.AcceptWaveform(chunk):
            result = json.loads(recognizer.Result())
            transcription += result.get('text', '') + " "

    # Final piece
    final_result = json.loads(recognizer.FinalResult())
    transcription += final_result.get('text', '')

    transcription = transcription.strip()
    print(f"🔹 Final Transcription:\n{transcription}\n")
    return transcription

# =============================================================
# 3G) Load the Summarization Model (BART)
# =============================================================
def load_summarization_model_bart(model_name="facebook/bart-large-cnn"):
    print(f"✅ Loading Summarization Model: {model_name}")
    try:
        summarizer = pipeline("summarization", model=model_name, device=0 if torch.cuda.is_available() else -1)
        print("✅ Summarization model (BART) loaded successfully!")
        return summarizer
    except Exception as e:
        print(f"❌ Error loading BART summarization model: {e}")
        return None

# Initialize BART summarizer
summarizer = load_summarization_model_bart()

# =============================================================
# 3H) Generate a Summary Using BART
# =============================================================
def generate_summary(transcription, summarizer, max_length=150, min_length=40):
    if not transcription.strip():
        print("❌ No valid transcription provided. Skipping summarization.")
        return "No meaningful transcription found."

    prompt = transcription  # Directly using transcription for summarization

    print("▶️ Generating summary...")

    try:
        summary_list = summarizer(
            prompt,
            max_length=max_length,
            min_length=min_length,
            do_sample=False
        )
        summary = summary_list[0]['summary_text']

        if not summary.strip():
            summary = "I couldn't generate a meaningful summary."
            print("⚠️ The model returned an empty or invalid summary.")
        else:
            print(f"✅ Summary generated:\n{summary}\n")

        return summary
    except Exception as e:
        print(f"❌ Error in generate_summary: {e}")
        return "Sorry, I couldn't generate a response."

# =============================================================
# 3I) Text-to-Speech with Coqui TTS
# =============================================================
def text_to_speech_coqui(text):
    """
    Convert the given text into an audio file (response.wav) using Coqui TTS.
    """
    try:
        if not text.strip():
            print("⚠️ Cannot generate TTS from empty text!")
            return None

        print("✅ Initializing Coqui TTS...")
        tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
        output_audio_path = "response.wav"

        print(f"🔹 Text for TTS: {text}")  # This line ensures visibility
        tts.tts_to_file(text=text, file_path=output_audio_path)

        print(f"✅ TTS audio saved to: {output_audio_path}")
        return output_audio_path

    except Exception as e:
        print(f"❌ Error in text_to_speech_coqui: {e}")
        return None

# =============================================================
# 3J) Helper Function to Play Audio
# =============================================================
def play_audio(audio_file_path):
    return Audio(audio_file_path, autoplay=True)

# =============================================================
# 3K) Unified Workflow
# =============================================================
def unified_workflow(audio_file_path):
    try:
        # Basic format check
        if not audio_file_path.endswith((".wav", ".mp3")):
            raise ValueError("Unsupported audio format. Use a .wav or .mp3 file.")

        if summarizer is None:
            raise EnvironmentError("Summarization model not loaded. Check your model loading step.")

        print("\n🚀 Starting AI Meeting Processing...")

        # Step 1: Load Vosk model
        vosk_model = load_vosk_model(MODEL_PATH)

        # Step 2: Transcribe audio
        transcription = transcribe_audio(vosk_model, audio_file_path)
        if not transcription:
            print("⚠️ No transcription returned. Exiting.")
            return

        # Step 3: Generate Summary
        summary = generate_summary(transcription, summarizer)
        if not summary:
            print("⚠️ No summary generated. Exiting.")
            return

        # Step 4: Convert summary to speech
        print("▶️ Converting summary to TTS audio...")
        tts_file = text_to_speech_coqui(summary)
        if tts_file:
            print("🔊 Playing generated audio...")
            display(play_audio(tts_file))
        else:
            print("❌ TTS failed to generate an audio response.")

    except Exception as e:
        print(f"❌ Error in unified_workflow: {e}")

# Replace 'your_audio_file.mp3' with the actual uploaded file name
unified_workflow("ElevenLabs_ProdigyAI.mp3")

No old Vosk model zip files to delete or only one file is present.
Downloaded Vosk model zip.
Archive:  vosk-model-small-en-us-0.15.zip
   creating: vosk-model-small-en-us-0.15/
   creating: vosk-model-small-en-us-0.15/am/
  inflating: vosk-model-small-en-us-0.15/am/final.mdl  
   creating: vosk-model-small-en-us-0.15/graph/
  inflating: vosk-model-small-en-us-0.15/graph/disambig_tid.int  
  inflating: vosk-model-small-en-us-0.15/graph/HCLr.fst  
  inflating: vosk-model-small-en-us-0.15/graph/Gr.fst  
   creating: vosk-model-small-en-us-0.15/graph/phones/
  inflating: vosk-model-small-en-us-0.15/graph/phones/word_boundary.int  
   creating: vosk-model-small-en-us-0.15/conf/
  inflating: vosk-model-small-en-us-0.15/conf/model.conf  
  inflating: vosk-model-small-en-us-0.15/conf/mfcc.conf  
   creating: vosk-model-small-en-us-0.15/ivector/
  inflating: vosk-model-small-en-us-0.15/ivector/splice.conf  
  inflating: vosk-model-small-en-us-0.15/ivector/final.dubm  
  inflating: vosk-model-s

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


✅ Summarization model (BART) loaded successfully!

🚀 Starting AI Meeting Processing...
✅ Loading Vosk model...
✅ Vosk model loaded successfully!
▶️ Starting transcription for ElevenLabs_ProdigyAI.mp3...
🔹 Final Transcription:
hello my name is a alina and i am the cofounder and marketing and grant acquisition specialist at prodigy ai solutions our company is dedicated to creating innovative machine learning solutions tailored specifically for small and medium sized businesses or smbs at prodigy i solutions we offer a suite of tools designed to address critical business needs such as cyber security data analysis customer relationship manage meant crm and financial management by leveraging advanced ai frameworks like tensor flow care us and pie torch we provide smbs with powerful ready to use solutions that optimize operations improve decision making and safeguard digital assets our goal is to democratize access to sophisticated ai technologies enabling smbs to compete effectively in the 

 93%|█████████▎| 105M/113M [00:03<00:00, 27.0MiB/s] 

 > Model's license - apache 2.0
 > Check https://choosealicense.com/licenses/apache-2.0/ for more info.
 > Downloading model to /root/.local/share/tts/vocoder_models--en--ljspeech--hifigan_v2



100%|██████████| 113M/113M [00:05<00:00, 20.3MiB/s]


 > Model's license - apache 2.0
 > Check https://choosealicense.com/licenses/apache-2.0/ for more info.
 > Using model: Tacotron2
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:8000.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:2.718281828459045
 | > hop_length:256
 | > win_length:1024


  return torch.load(f, map_location=map_location, **kwargs)


 > Model's reduction rate `r` is set to: 1
 > Vocoder Model: hifigan
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:8000.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:False
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:2.718281828459045
 | > hop_length:256
 | > win_length:1024
 > Generator Model: hifigan_generator
 > Discriminator Model: hifigan_discriminator
Removing weight norm...
🔹 Text for TTS: Prodigy i solutions is a company dedicated to cre