## Task 1: Retrieval-Augmented Generation (RAG) Model for QA Bot


### Set up the Environment

Install Required Packages:

```pinecone-client``` for Pinecone DB.

*   ```transformers``` for handling embeddings.
*   ```datasets``` if you plan to use a dataset for testing.
*   ```Cohere API``` for the generative model.

In [None]:
!pip install pinecone-client transformers datasets cohere


### Set Up Pinecone:

In [None]:
import os
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(
    api_key="b8fa39d7-0b89-4e9c-9534-81216e350b0b"
)

index_name = "qa-bot-index"
# Connect to the index
index = pc.Index(index_name)

### Set Up Cohere API:

In [None]:
import cohere

# Initialize Cohere client
co = cohere.Client("4Xp1G1vwLELRyqkySME5Px88rW1JkiEsMfEC28RJ")


### Load the SQuAD Dataset


In [None]:
from datasets import load_dataset
# Load the SQuAD (Stanford Question Answering Dataset)
squad_dataset = load_dataset("squad")
print(squad_dataset['train'])

contexts = [entry['context'] for entry in squad_dataset['train']]
questions = [entry['question'] for entry in squad_dataset['train']]

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 87599
})


### Chunking Contexts

In [None]:
import nltk
from nltk.tokenize import sent_tokenize

nltk.download('punkt')

# Function to split contexts into smaller chunks
def split_into_chunks(context, max_length=100):
    sentences = sent_tokenize(context)
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_length:
            current_chunk += sentence + " "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + " "

    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

# Split all contexts into smaller chunks
document_chunks = []
for context in contexts:
    chunks = split_into_chunks(context)
    document_chunks.extend(chunks)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### Generating Embeddings


In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
from tqdm import tqdm

# Load Sentence-BERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

# Generate embeddings in batches
def generate_embeddings_batch(texts, batch_size=16):
    embeddings = []
    for i in tqdm(range(0, len(texts), batch_size)):
        batch_texts = texts[i:i+batch_size]
        inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512)

        if torch.cuda.is_available():
            inputs = {key: val.cuda() for key, val in inputs.items()}
            model.cuda()

        with torch.no_grad():
            outputs = model(**inputs)

        batch_embeddings = outputs.last_hidden_state.mean(dim=1)
        embeddings.append(batch_embeddings.cpu().numpy())

    return torch.cat([torch.tensor(batch) for batch in embeddings])

# Generate embeddings for document chunks
batch_size = 16
doc_embeddings = generate_embeddings_batch(document_chunks, batch_size=batch_size)


 14%|█▎        | 4326/31876 [31:03<3:37:33,  2.11it/s]

### Querying Pinecone

In [None]:
# Generate query embedding
def generate_query_embedding(query):
    inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=512)

    if torch.cuda.is_available():
        inputs = {key: val.cuda() for key, val in inputs.items()}
        model.cuda()

    with torch.no_grad():
        outputs = model(**inputs)

    query_embedding = outputs.last_hidden_state.mean(dim=1)
    return query_embedding.cpu().numpy()


In [None]:
# Query Pinecone with the query embedding
def query_pinecone(query, top_k=5):
    query_embedding = generate_query_embedding(query)

    results = index.query(vector=query_embedding.tolist(), top_k=top_k)

    document_ids = [match['id'] for match in results['matches']]
    return document_ids

### Generating the Final Answer

In [None]:
# Generate answer based on relevant chunks and user query
def generate_answer(contexts, question):
    context_text = " ".join(contexts)

    response = co.generate(
        model="command",
        prompt=f"Context: {context_text}\n\nQuestion: {question}\n\nAnswer:",
        max_tokens=150,
        temperature=0.5
    )

    return response.generations[0].text.strip()

In [None]:
# Retrieve answer based on query
def get_answer_from_query(query, top_k=5):
    relevant_chunk_ids = query_pinecone(query, top_k=top_k)

    relevant_chunks = [document_chunks[int(id)] for id in relevant_chunk_ids]  # Assuming IDs match the chunk indices

    final_answer = generate_answer(relevant_chunks, query)

    return final_answer

### Running the QA Bot

In [None]:
user_query = "What is the role of AI in healthcare?"
answer = get_answer_from_query(user_query, top_k=5)
print(f"Answer: {answer}")

In [None]:
user_query = "How does machine learning improve customer experience?"
answer = get_answer_from_query(user_query, top_k=5)
print(f"Answer: {answer}")

In [None]:
user_query = "What are the key components of a neural network?"
answer = get_answer_from_query(user_query, top_k=5)
print(f"Answer: {answer}")

In [None]:
user_query = "How is natural language processing (NLP) used in sentiment analysis?"
answer = get_answer_from_query(user_query, top_k=5)
print(f"Answer: {answer}")

In [None]:
user_query = "What are the advantages of edge computing in IoT?"
answer = get_answer_from_query(user_query, top_k=5)
print(f"Answer: {answer}")