In [4]:
# Install PyTorch with GPU support
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Install Transformers for Whisper and related utilities
!pip install transformers

# Install LangChain for the embeddings and vector store functionalities
!pip install langchain

# Install FAISS for vector search
!pip install faiss-gpu

# Install NumPy for numerical computations
!pip install numpy

# Optional: Install Pickle if not pre-installed (most environments include it by default)
!pip install pickle-mixin

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
Collecting pickle-mixin
  Downloading pickle-mixin-1.0.2.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pickle-mixin
  Building wheel for pickle-mixin (setup.py) ... [?25l[?25hdone
  Created wheel for pickle-mixin: filename=pickle_mixin-1.0.2-py3-none-any.whl size=5990 sha256=4c221412418b29d56cb29379636c4f496e6808dc00cb03dfb3afb400179842bc
  Stored in directory: /root/.cache/pip/wheels/3e/c6/e9/d1b0a34e1efc6c3ec9c086623972c6de6317faddb2af0a619

In [2]:
!pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.15-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.15 (from langchain-community)
  Downloading langchain-0.3.15-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.31 (from langchain-community)
  Downloading langchain_core-0.3.31-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.25.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [5]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
import numpy as np
import faiss
import pickle

In [7]:
# Step 1: Initialize Whisper model and processor
# Ensure the T4 GPU is utilized effectively
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda:0" else torch.float32

# Use the Whisper model ID
model_id = "openai/whisper-large-v3-turbo"

# Load the Whisper model with efficient memory management
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id,
    torch_dtype=torch_dtype,
    low_cpu_mem_usage=True
).to(device)

# Load the processor for Whisper
whisper_processor = AutoProcessor.from_pretrained(model_id)

# Set up the Whisper pipeline for automatic speech recognition
whisper_pipeline = pipeline(
    "automatic-speech-recognition",
    model=whisper_model,
    tokenizer=whisper_processor.tokenizer,
    feature_extractor=whisper_processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
)

# Print device and model details for confirmation
print(f"Device: {device}")
print(f"Model loaded: {model_id}")

model.safetensors:  10%|#         | 168M/1.62G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/3.77k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/340 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/283k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.71M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/494k [00:00<?, ?B/s]

normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.19k [00:00<?, ?B/s]

Device set to use cuda:0


Device: cuda:0
Model loaded: openai/whisper-large-v3-turbo


In [10]:
# Function to transcribe audio
def transcribe_audio(audio_file_path):
    try:
        transcription_result = whisper_pipeline(
            audio_file_path,
            return_timestamps=True  # Enables long-form transcription with timestamps
        )
        # Extract the transcription text
        transcription_text = transcription_result["text"]
        return transcription_text
    except Exception as e:
        print(f"Error during transcription: {e}")
        return None

# Path to your audio file
audio_file_path = "/content/audio1.mp3"

# Transcribe the audio
transcription = transcribe_audio(audio_file_path)
print(f"Transcription: {transcription}")


Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


Transcription:  Empowerment is authority. It is a sign permission slip to actually seize the day. It's the process of getting stronger and more confident and more engaged. And to be empowered is to move through the world without any kind of fear or any kind of apology. And with these gifts comes an even deeper privilege, I believe. And that is the ability to take charge of your own life, to own yourself and claim your right.


In [11]:
# Step 3: Split the transcription into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.create_documents([transcription])

In [12]:
# Step 4: Create embeddings and FAISS index
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [13]:
# Get the actual vector size from the embeddings
sample_vector = embeddings.embed_query("test")
vector_size = len(sample_vector)
print(f"Embedding vector size: {vector_size}")

Embedding vector size: 384


In [14]:
# Create FAISS index with the correct vector size
faiss_index = faiss.IndexFlatL2(vector_size)

In [15]:
# Step 5: Add vectors to FAISS index
vectors = [embeddings.embed_query(doc.page_content) for doc in texts]
vectors_np = np.array(vectors, dtype='float32')  # Ensure float32 type for FAISS
print(f"Shape of vectors_np: {vectors_np.shape}")

Shape of vectors_np: (1, 384)


In [16]:
# Add vectors to the index
faiss_index.add(vectors_np)
print("Vectors added to FAISS index successfully!")

Vectors added to FAISS index successfully!


In [17]:
# Step 6: Create a docstore and index_to_docstore_id mapping
docstore = InMemoryDocstore({str(i): texts[i] for i in range(len(texts))})
index_to_docstore_id = {i: str(i) for i in range(len(texts))}

In [18]:
# Step 7: Initialize the FAISS vector store
vectordb = FAISS(
    index=faiss_index,
    docstore=docstore,
    index_to_docstore_id=index_to_docstore_id,
    embedding_function=embeddings.embed_query
)

print("FAISS vector store initialized successfully!")
faiss_index_file = "faiss_index.index"
embeddings_file = "embeddings.pkl"



FAISS vector store initialized successfully!


In [19]:
# Save the FAISS index
faiss.write_index(faiss_index, faiss_index_file)
print(f"FAISS index saved to {faiss_index_file}")

FAISS index saved to faiss_index.index


In [20]:
# Save the metadata
with open(embeddings_file, "wb") as f:
    pickle.dump(texts, f)
print(f"Metadata saved to {embeddings_file}")

Metadata saved to embeddings.pkl


In [21]:
# Step 8: Create the QA retrieval chain
groq_api_key = "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"

In [23]:
!pip install langchain-groq

Collecting langchain-groq
  Downloading langchain_groq-0.2.3-py3-none-any.whl.metadata (3.0 kB)
Collecting groq<1,>=0.4.1 (from langchain-groq)
  Downloading groq-0.15.0-py3-none-any.whl.metadata (14 kB)
Downloading langchain_groq-0.2.3-py3-none-any.whl (14 kB)
Downloading groq-0.15.0-py3-none-any.whl (109 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.6/109.6 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain-groq
Successfully installed groq-0.15.0 langchain-groq-0.2.3


In [24]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0.3,
    api_key=groq_api_key,
)

In [25]:
template = """Use the following context to answer the question at the end. If the answer is unknown, state that clearly, and avoid speculating. Keep the tone direct, futuristic, and visionary. Use no more than three sentences.
Always conclude with a thought-provoking or motivational remark, as Elon Musk might.
{context}
Question: {question}
Visionary Answer:"""

In [26]:
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={'prompt': QA_CHAIN_PROMPT}
)

In [31]:
# Step 9: Test the QA system
query = "Hey Elon! I am sad"
def process_qa_retrieval_chain(chain, query):
    response = chain.invoke({'query': query})

    result_str = f'Query: {response["query"]}\n\n'
    result_str += f'Result: {response["result"]}\n\n'

    relevant_docs = response['source_documents']
    for i in range(len(relevant_docs)):
        result_str += f'Relevant Doc {i+1}:\n'
        result_str += relevant_docs[i].page_content + '\n'
        result_str += str(relevant_docs[i].metadata) + '\n\n'

    return result_str

In [32]:
result = process_qa_retrieval_chain(qa_chain, query)
print(result)

Query: Hey Elon! I am sad

Result: Your emotional state is acknowledged, but it's unknown what's causing your sadness. As you navigate this feeling, remember that empowerment is the key to unlocking your true potential. Now, go out there and seize the day, for the future belongs to those who dare to own themselves and claim their right to a brighter tomorrow.

Relevant Doc 1:
Empowerment is authority. It is a sign permission slip to actually seize the day. It's the process of getting stronger and more confident and more engaged. And to be empowered is to move through the world without any kind of fear or any kind of apology. And with these gifts comes an even deeper privilege, I believe. And that is the ability to take charge of your own life, to own yourself and claim your right.
{}




In [33]:
!pip install fastapi uvicorn torch torchvision torchaudio transformers langchain faiss-gpu pickle-mixin langchain-community langchain-groq

Collecting fastapi
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Downloading fastapi-0.115.6-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading starlette-0.41.3-py3-none-any.whl (73 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, starlette, fastapi
Successfully installed fastapi-0.115.6 starlette-0.41.3 uvicorn-0.34.0


In [34]:
!uvicorn main:app --host 0.0.0.0 --port 8000 --reload

[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m15363[0m] using [36m[1mStatReload[0m
2025-01-22 19:22:17.537916: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-22 19:22:17.576129: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-22 19:22:17.586360: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

>> from langchain.embeddings import HuggingFaceEmbeddings

with new imports of:

>> from langchain_comm

In [35]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [37]:
from pyngrok import ngrok

# Start the server using Uvicorn in the background
!uvicorn main:app --host 0.0.0.0 --port 8000 &

# Expose the port using ngrok
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")

2025-01-22 19:27:29.875850: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-22 19:27:29.895433: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-22 19:27:29.901450: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

>> from langchain.embeddings import HuggingFaceEmbeddings

with new imports of:

>> from langchain_community.embeddings import HuggingFaceEmbeddings
You can use the langchain cli to **automatically** upgrade many imports. Please see documentation here <https://python.langchain.com/docs/versions/v0_2/>
  from langchain.embeddings import HuggingFaceEmbeddings

>> 

ERROR:pyngrok.process.ngrok:t=2025-01-22T19:29:17+0000 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n"


PyngrokNgrokError: The ngrok process errored on start: authentication failed: Usage of ngrok requires a verified account and authtoken.\n\nSign up for an account: https://dashboard.ngrok.com/signup\nInstall your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_4018\r\n.