In [1]:
!pip install langchain langchain_fireworks langchain_community beautifulsoup4 google-search-results chromadb langchainhub sentence-transformers langchain-chroma gradio aiolimiter lxml faiss-cpu flashrank

In [2]:
import asyncio
import aiohttp
import os
from langchain import hub
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_fireworks import FireworksEmbeddings, ChatFireworks
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain.prompts import PromptTemplate
from langchain.schema import Document
import requests
from bs4 import BeautifulSoup
import nltk
from nltk.tokenize import sent_tokenize
import re
import io
import time
import sys
import gradio as gr
import asyncio
from typing import List, Tuple, Any
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
import numpy as np
from functools import lru_cache
import faiss
import httpx
from urllib.parse import urlparse
from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.retrievers import ContextualCompressionRetriever
from flashrank import Ranker, RerankRequest
import math
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain.output_parsers import RegexParser

In [3]:
# Set up API clients
os.environ['FIREWORKS_API_KEY'] = 'API_KEY'
os.environ["SERPER_API_KEY"] = 'API_KEY'


# Download NLTK data for sentence tokenization
nltk.download('punkt', quiet=True)

True

In [16]:
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, validator


# Initialize components
search = GoogleSerperAPIWrapper(k=3)
embeddings = FireworksEmbeddings(model="nomic-ai/nomic-embed-text-v1.5")
llm = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-8b-instruct", temperature=0)
llm_8b = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-8b-instruct", temperature=0)
llm_70b = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-70b-instruct", temperature=0)

# Initialize Flashrank Rankers
ranker_nano = Ranker()

class ChunkIndices(BaseModel):
    indices: list[int] = Field(description="List of character indices where the content should be split")

    @validator('indices')
    def check_indices(cls, indices):
        if not all(isinstance(i, int) for i in indices):
            raise ValueError("All elements must be integers")
        return indices


def get_chunk_indices(content, target_chunk_size=250):
    template = """
    Analyze the following content and insert '[chunk_here]' at appropriate points to split it into chunks of approximately {target_chunk_size} characters each, maintaining context and coherence. Follow these guidelines:
    1. Keep function definitions and code blocks intact.
    2. Don't split in the middle of a sentence or a line of code.
    3. Try to split at logical breaks in the content.
    4. For prose, prefer splitting at paragraph boundaries.
    5. For code, prefer splitting at function or class boundaries.
    6. Do not modify the original content other than inserting '[chunk_here]'.

    Content length: {content_length} characters

    Content:
    {content}

    Provide the content with '[chunk_here]' insertions:
    """

    prompt = PromptTemplate(
        input_variables=["target_chunk_size", "content_length", "content"],
        template=template
    )

    full_prompt = prompt.format(
        target_chunk_size=target_chunk_size,
        content_length=len(content),
        content=content
    )

    response = llm_8b.invoke(full_prompt)
    response_text = response.content if hasattr(response, 'content') else str(response)

    chunks = response_text.split('[chunk_here]')
    indices = [0]
    for chunk in chunks[:-1]:
        indices.append(indices[-1] + len(chunk))

    return indices

def chunk_content(content, chunk_indices):
    chunks = []
    start = 0
    for end in chunk_indices:
        chunks.append(content[start:end].strip())
        start = end
    chunks.append(content[start:].strip())  # Add the last chunk
    return chunks

async def scrape_webpage(client, url):
    try:
        response = await client.get(url, timeout=3.0)
        response.raise_for_status()
        text = response.text
        soup = BeautifulSoup(text, 'lxml')
        content = ' '.join(soup.stripped_strings)
        return content[:5000], len(content[:5000])
    except (httpx.RequestError, httpx.TimeoutException) as exc:
        print(f"An error occurred while requesting {url}: {exc}")
    except httpx.HTTPStatusError as exc:
        print(f"Error response {exc.response.status_code} while requesting {url}")
    except Exception as e:
        print(f"Error scraping {url}: {e}")
    return "", 0

async def search_and_scrape(query, num_urls):
    search_results = search.results(query)
    scraped_urls = set()
    full_texts = []

    async with httpx.AsyncClient(timeout=httpx.Timeout(10.0, connect=3.0)) as client:
        tasks = []
        if 'organic' in search_results:
            for result in search_results['organic']:
                url = result.get('link')
                domain = urlparse(url).netloc if url else None
                if url and domain not in scraped_urls and len(tasks) < num_urls:
                    tasks.append(scrape_webpage(client, url))
                    scraped_urls.add(domain)

        results = await asyncio.gather(*tasks, return_exceptions=True)
        for result in results:
            if isinstance(result, tuple) and result[1] > 0:
                full_texts.append(result[0])

    return " ".join(full_texts)

def query_expansion(query, num_expansions):
    expansion_prompt = f"""
    Given the following search query, generate {num_expansions} additional related queries that could help find more comprehensive information on the topic. The queries should be different from each other and explore various aspects of the main query. Provide only the additional queries, numbered 1-{num_expansions}.

    Main query: {query}

    Additional queries:
    """

    response = llm.invoke(expansion_prompt)
    response_text = response.content if hasattr(response, 'content') else str(response)

    expanded_queries = [query]
    for line in response_text.split('\n'):
        if line.strip() and line[0].isdigit():
            expanded_queries.append(line.split('. ', 1)[1].strip())

    return expanded_queries[:num_expansions + 1]

def generate_hypothetical_document(query):
    hyde_prompt = f"""
    Given the search query below, generate a hypothetical document that would be a perfect match for this query. The document should be concise, containing only 3 sentences of relevant information that directly addresses the query.

    Query: {query}

    Hypothetical Document (3 sentences):
    """

    response = llm.invoke(hyde_prompt)
    return response.content if hasattr(response, 'content') else str(response)

def batch_rerank(ranker, query, documents, batch_size=32):
    all_reranked = []
    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]
        passages = [{"id": j, "text": doc.page_content} for j, doc in enumerate(batch, start=i)]
        rerank_request = RerankRequest(query=query, passages=passages)
        reranked_batch = ranker.rerank(rerank_request)
        all_reranked.extend(reranked_batch)
    return all_reranked

def get_hyde_retriever(vectorstores, hyde_embedding, num_docs_to_rerank, num_docs_to_use, rerank_option):
    def retriever(query):
        all_docs = []
        for vectorstore in vectorstores:
            docs = vectorstore.similarity_search_by_vector(hyde_embedding, k=num_docs_to_rerank)
            all_docs.extend(docs)

        unique_docs = []
        seen_content = set()
        for doc in all_docs:
            content = doc.page_content
            if content not in seen_content:
                unique_docs.append(Document(page_content=content))
                seen_content.add(content)

        if rerank_option != "No Rerank":
            if rerank_option == "Nano Cross-Encoder":
                ranker = ranker_nano
            else:
                raise ValueError(f"Invalid rerank option: {rerank_option}")

            reranked_results = batch_rerank(ranker, query, unique_docs)
            reranked_results.sort(key=lambda x: x["score"], reverse=True)
            reranked_docs = [Document(page_content=result["text"]) for result in reranked_results[:num_docs_to_use]]
            return reranked_docs
        else:
            return unique_docs[:num_docs_to_use]
    return retriever

def batch_embed_documents(documents, batch_size=128):
    batched_embeddings = []
    for i in range(0, len(documents), batch_size):
        batch = documents[i:i + batch_size]
        texts = [doc.page_content for doc in batch]
        embeddings_batch = embeddings.embed_documents(texts)
        batched_embeddings.extend(embeddings_batch)
    return batched_embeddings

async def process_query(query, num_expansions, num_urls, num_docs_to_rerank, num_docs_to_use, rerank_option, use_70b_model):
    try:
        start_time = time.time()

        hyde_start = time.time()
        hypothetical_doc = generate_hypothetical_document(query)
        hyde_time = time.time() - hyde_start
        print(f"hypothetical_doc length: {len(hypothetical_doc)}")
        print(f"-----HyDE generation time: {hyde_time:.2f} seconds")

        embed_start = time.time()
        hyde_embedding = embeddings.embed_query(hypothetical_doc)
        embed_time = time.time() - embed_start
        print(f"-----Embedding time: {embed_time:.2f} seconds")

        ext_start = time.time()
        expanded_queries = query_expansion(query, num_expansions)
        ext_time = time.time() - embed_start
        print(f"-----Query expansion time: {embed_time:.2f} seconds")

        scrape_start = time.time()
        all_texts = await asyncio.gather(*[search_and_scrape(eq, num_urls) for eq in expanded_queries])
        scrape_time = time.time() - scrape_start
        print(f"-----Web scraping time: {scrape_time:.2f} seconds")

        combined_text = " ".join(all_texts)
        print(f"Combined text length: {len(combined_text)} characters")

        chunk_start = time.time()
        chunk_indices = get_chunk_indices(combined_text)
        chunks = chunk_content(combined_text, chunk_indices)
        chunk_time = time.time() - chunk_start
        print(f"-----Chunking time: {chunk_time:.2f} seconds")
        print(f"Number of chunks: {len(chunks)}")

        index_documents = [Document(page_content=chunk) for chunk in chunks]

        vectorstore_start = time.time()
        vectorstores = []
        for i in range(0, len(index_documents), 256):
            batch = index_documents[i:i + 256]
            batch_embeddings = batch_embed_documents(batch)
            texts = [doc.page_content for doc in batch]
            vectorstore = FAISS.from_embeddings(
                embedding=embeddings,
                text_embeddings=list(zip(texts, batch_embeddings))
            )
            vectorstores.append(vectorstore)

        vectorstore_time = time.time() - vectorstore_start
        print(f"-----Vectorstore creation time: {vectorstore_time:.2f} seconds")

        retrieval_start = time.time()
        retriever = get_hyde_retriever(vectorstores, hyde_embedding, num_docs_to_rerank, num_docs_to_use, rerank_option)
        retrieved_docs = retriever(query)
        retrieval_time = time.time() - retrieval_start
        print(f"-----Retrieval{' and reranking' if rerank_option != 'No Rerank' else ''} time: {retrieval_time:.2f} seconds")

        print(f"Number of retrieved{' and reranked' if rerank_option != 'No Rerank' else ''} documents: {len(retrieved_docs)}")

        context_docs = [doc.page_content for doc in retrieved_docs]
        context = "\n\n".join(context_docs)

        total_processing_time = hyde_time + embed_time + scrape_time + chunk_time + vectorstore_time + retrieval_time
        print(f"-----Total processing time before answer generation: {total_processing_time:.2f} seconds")

        answer_start = time.time()
        prompt_template = """
        Use the following context to answer the question. Before answering the question generate a reasoning step. then answer.
        If you cannot answer based on the context, say "I don't have enough information to answer that question."

        Context:
        {context}

        Question: {question}

        Answer:
        """
        prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

        chosen_llm = llm_70b if use_70b_model else llm_8b

        rag_chain = prompt | chosen_llm | StrOutputParser()
        answer = rag_chain.invoke({"context": context, "question": query})
        answer_time = time.time() - answer_start
        print(f"-----Answer generation time: {answer_time:.2f} seconds")

        print("\n")
        print("-"*120)
        print("Final Answer:\n", answer)
        print("-"*120)

        return answer, context_docs

    except Exception as e:
        print(f"An error occurred: {e}")
        import traceback
        traceback.print_exc()
        return "I'm sorry, but I encountered an error while processing your query. Please try again.", []

def gradio_interface(query, num_expansions, num_urls, num_docs_to_rerank, num_docs_to_use, rerank_option, use_70b_model):
    old_stdout = sys.stdout
    sys.stdout = buffer = io.StringIO()

    answer, context_docs = asyncio.run(process_query(query, num_expansions, num_urls, num_docs_to_rerank, num_docs_to_use, rerank_option, use_70b_model))

    sys.stdout = old_stdout
    captured_output = buffer.getvalue()

    truncated_docs = [f"Document {i+1}: {doc[:150]}..." for i, doc in enumerate(context_docs)]
    truncated_context = "\n\n".join(truncated_docs)

    captured_output += f"\n\nContext used for answer generation (first 150 characters of each document, {len(context_docs)} documents in total):\n" + truncated_context

    return captured_output

# Create Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Enter your query"),
        gr.Slider(minimum=0, maximum=3, value=1, step=1, label="Number of query expansions"),
        gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Number of URLs to scrape per extended query"),
        gr.Slider(minimum=20, maximum=100, value=80, step=1, label="Number of documents to rerank"),
        gr.Slider(minimum=10, maximum=100, value=50, step=1, label="Number of reranked documents to use"),
        gr.Radio(["No Rerank", "Nano Cross-Encoder"], label="Reranking Option", value="Nano Cross-Encoder"),
        gr.Radio(["LLaMA3.1 8B", "LLaMA3.1 70B"], label="Question Answering Option", value="LLaMA3.1 8B")
    ],
    outputs="text",
    title="Advanced RAG Query Processing with Flashrank",
    description="Enter a query and adjust parameters to get a detailed answer based on web search and document analysis. Choose from different reranking options."
)

if __name__ == "__main__":
    iface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://50829bda5c883f5291.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "<ipython-input-16-b2bdc174d304>", line 211, in process_query
    all_texts = await asyncio.gather(*[search_and_scrape(eq, num_urls) for eq in expanded_queries])
  File "<ipython-input-16-b2bdc174d304>", line 90, in search_and_scrape
    search_results = search.results(query)
  File "/usr/local/lib/python3.10/dist-packages/langchain_community/utilities/google_serper.py", line 62, in results
    return self._google_serper_api_results(
  File "/usr/local/lib/python3.10/dist-packages/langchain_community/utilities/google_serper.py", line 164, in _google_serper_api_results
    response.raise_for_status()
  File "/usr/local/lib/python3.10/dist-packages/requests/models.py", line 1021, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://google.serper.dev/search?q=how+to+take+care+of+my+dog%3F&gl=us&hl=en&num=3


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://50829bda5c883f5291.gradio.live


In [18]:
# Define the content (the 5000-character document we created earlier)
content = """
Natural Language Processing (NLP) is a fascinating field at the intersection of computer science, artificial intelligence, and linguistics. It focuses on the interaction between computers and human language, enabling machines to understand, interpret, and generate human-readable text. One of the fundamental tasks in NLP is text classification, which involves categorizing text documents into predefined classes or categories.

Let's explore a simple example of text classification using Python and the popular machine learning library scikit-learn. We'll build a basic sentiment analysis model that can classify movie reviews as either positive or negative.

First, we need to import the necessary libraries:

import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

Next, let's define a small dataset of movie reviews with their corresponding sentiments:

reviews = [
    "This movie was fantastic! I loved every minute of it.",
    "Terrible acting and a boring plot. Waste of time.",
    "Great performances and an engaging story. Highly recommended!",
    "I fell asleep halfway through. Extremely dull.",
    "A masterpiece of modern cinema. Absolutely brilliant!",
    "Poorly written and predictable. Don't bother watching.",
    "Visually stunning with excellent character development.",
    "A complete disaster. One of the worst films I've ever seen.",
    "Captivating from start to finish. A must-watch!",
    "Disappointing and confusing. I expected much better."
]

sentiments = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1 for positive, 0 for negative

Now that we have our data, we need to preprocess it. We'll use the CountVectorizer to convert our text data into numerical features:

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(reviews)
y = np.array(sentiments)

Let's split our data into training and testing sets:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Now we can train our Naive Bayes classifier:

clf = MultinomialNB()
clf.fit(X_train, y_train)

With our model trained, we can make predictions on the test set and evaluate its performance:

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

This simple example demonstrates the basics of text classification using machine learning. However, in real-world applications, we often deal with much larger datasets and more complex models.

One popular approach for more advanced NLP tasks is to use pre-trained language models like BERT (Bidirectional Encoder Representations from Transformers). BERT and its variants have revolutionized the field of NLP by achieving state-of-the-art results on various tasks.

Let's take a look at how we can use the Transformers library to implement a BERT-based text classification model:

from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Tokenize and encode the input text
def encode_text(text):
    return tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

# Function to make predictions
def predict_sentiment(text):
    encoding = encode_text(text)
    input_ids = encoding['input_ids']
    attention_mask = encoding['attention_mask']

    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        probabilities = torch.softmax(logits, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()

    return 'Positive' if predicted_class == 1 else 'Negative'

# Example usage
review = "This movie exceeded all my expectations. The plot was engaging, and the actors delivered stellar performances."
sentiment = predict_sentiment(review)
print(f"Sentiment: {sentiment}")

This BERT-based approach can provide more accurate results, especially for more nuanced text classification tasks. However, it's important to note that using pre-trained models like BERT requires more computational resources and may not be suitable for all use cases.

As you dive deeper into NLP, you'll encounter many other fascinating topics and techniques, such as:

1. Named Entity Recognition (NER)
2. Part-of-Speech (POS) Tagging
3. Text Summarization
4. Machine Translation
5. Question Answering Systems
6. Topic Modeling
7. Sentiment Analysis at scale
8. Chatbots and Conversational AI

Each of these areas presents unique challenges and opportunities for innovation. As the field of NLP continues to evolve, new models and techniques are constantly being developed, pushing the boundaries of what's possible in natural language understanding and generation.

In conclusion, NLP is a dynamic and rapidly growing field with numerous practical applications across various industries. From improving search engines and recommendation systems to enabling more natural human-computer interactions, NLP is transforming the way we interact with technology and process information. As you continue to explore this exciting field, remember that the key to success lies in not only understanding the underlying algorithms and models but also in developing a deep appreciation for the nuances and complexities of human language.
"""

In [5]:
# Initialize the Fireworks LLM
llm = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-8b-instruct", temperature=0)

# Define the chunking template
template = """
Analyze the following content and insert '[CHUNK]' (in all caps) at appropriate points to split it into chunks of
approximately {target_chunk_size} characters each, maintaining context and coherence.

Follow these guidelines:
1. Keep related information together.
2. Keep function definitions and code blocks intact as possible.
3. Don't split in the middle of a sentence or a line of code.
4. Try to split at logical breaks in the content.
5. For prose, prefer splitting at paragraph boundaries.
6. For code, prefer splitting at function or class boundaries.
5. Do not modify the original content other than inserting '[CHUNK]'.
6. Do not add any additional text or formatting.
7. The first chunk should start immediately, without any preceding text.
8. Most importantly you should keep the chunk size around {target_chunk_size} characters.

Content length: {content_length} characters

Content:
{content}

Return only the chunked content, with no additional text before or after:
"""

# Create a PromptTemplate
prompt = PromptTemplate(
    input_variables=["target_chunk_size", "content_length", "content"],
    template=template
)

# The content variable remains the same as in your original code

# Function to chunk the content
def chunk_content(content, target_chunk_size):
    content_length = len(content)
    formatted_prompt = prompt.format(
        target_chunk_size=target_chunk_size,
        content_length=content_length,
        content=content
    )

    response = llm.invoke(formatted_prompt)
    return response.content.strip()

# Function to split the content into chunks
def split_into_chunks(chunked_content):
    chunks = chunked_content.split('[CHUNK]')
    return [chunk.strip() for chunk in chunks if chunk.strip()]

# Chunk the content
target_chunk_size =200
start_time = time.time()
chunked_content = chunk_content(content, target_chunk_size)
total_time = time.time() - start_time
print(f"Total time: {total_time:.2f}\n")

# Split the content into chunks
chunks = split_into_chunks(chunked_content)

# Print the chunks
for i, chunk in enumerate(chunks, 1):
    print(f"Chunk {i} (Length: {len(chunk)} characters):")
    print(chunk)
    print("-" * 80)

# Print the total number of chunks
print(f"Total number of chunks: {len(chunks)}")

Total time: 5.46

Chunk 1 (Length: 427 characters):
Natural Language Processing (NLP) is a fascinating field at the intersection of computer science, artificial intelligence, and linguistics. It focuses on the interaction between computers and human language, enabling machines to understand, interpret, and generate human-readable text. One of the fundamental tasks in NLP is text classification, which involves categorizing text documents into predefined classes or categories.
--------------------------------------------------------------------------------
Chunk 2 (Length: 230 characters):
Let's explore a simple example of text classification using Python and the popular machine learning library scikit-learn. We'll build a basic sentiment analysis model that can classify movie reviews as either positive or negative.
--------------------------------------------------------------------------------
Chunk 3 (Length: 294 characters):
First, we need to import the necessary libraries:

import n

In [22]:
# Initialize the Fireworks LLM
llm = ChatFireworks(model="accounts/fireworks/models/llama-v3p1-8b-instruct", temperature=0)

# Define the improved chunking template
template = """
Analyze the following content and insert '[CHUNK]' (in all caps) at appropriate points to split it into chunks of
EXACTLY {target_chunk_size} characters each (with a maximum deviation of 10%), maintaining context and coherence as much as possible within this strict size constraint.

Follow these guidelines in order of priority:
1. Maintain chunk size: Each chunk MUST be between {min_chunk_size} and {max_chunk_size} characters. This is the most important rule.
2. Keep function definitions and code blocks intact when possible, but split them if necessary to meet the chunk size requirement.
3. For prose, prefer splitting at paragraph or sentence boundaries.
4. For code, prefer splitting at function, class, or logical block boundaries.
5. If necessary to meet the size requirement, split in the middle of a paragraph or code block.
6. Do not modify the original content other than inserting '[CHUNK]'.
7. Do not add any additional text or formatting.
8. The first chunk should start immediately, without any preceding text.

Remember: Maintaining the correct chunk size is more important than keeping related information together. If you need to split in a less ideal place to meet the size requirement, do so.

Content length: {content_length} characters
Target chunk size: {target_chunk_size} characters
Minimum chunk size: {min_chunk_size} characters
Maximum chunk size: {max_chunk_size} characters

Content:
{content}

Return only the chunked content, with no additional text before or after:
"""

# Create a PromptTemplate
prompt = PromptTemplate(
    input_variables=["target_chunk_size", "min_chunk_size", "max_chunk_size", "content_length", "content"],
    template=template
)

# Function to chunk the content
def chunk_content(content, target_chunk_size):
    content_length = len(content)
    min_chunk_size = int(target_chunk_size * 0.5)
    max_chunk_size = int(target_chunk_size * 1.5)
    formatted_prompt = prompt.format(
        target_chunk_size=target_chunk_size,
        min_chunk_size=min_chunk_size,
        max_chunk_size=max_chunk_size,
        content_length=content_length,
        content=content
    )

    llm_start_time = time.time()
    response = llm.invoke(formatted_prompt)
    llm_time = time.time() - llm_start_time
    return response.content.strip(), llm_time

# The rest of the code remains the same as in the previous version

# Function to split the content into chunks
def split_into_chunks(chunked_content):
    chunks = chunked_content.split('[CHUNK]')
    return [chunk.strip() for chunk in chunks if chunk.strip()]


# Gradio interface function
def process_text(input_text, chunk_size):
    start_time = time.time()
    input_length = len(input_text)
    chunked_content, llm_time = chunk_content(input_text, chunk_size)
    chunks = split_into_chunks(chunked_content)
    total_time = time.time() - start_time

    output = f"Input length: {input_length} characters\n"
    output += f"LLM processing time: {llm_time:.2f} seconds\n"
    output += f"Total processing time: {total_time:.2f} seconds\n\n"

    for i, chunk in enumerate(chunks, 1):
        output += f"Chunk {i} (Length: {len(chunk)} characters):\n{chunk}\n{'=' * 80}\n\n"

    output += f"Total number of chunks: {len(chunks)}"
    return output

# Create Gradio interface
iface = gr.Interface(
    fn=process_text,
    inputs=[
        gr.Textbox(label="Input Text", value=content, lines=10),
        gr.Slider(minimum=100, maximum=1000, value=200, step=50, label="Target Chunk Size")
    ],
    outputs=gr.Textbox(label="Chunked Output", lines=20),
    title="Text Chunker",
    description="Enter your text or use the default content, set the target chunk size, and see the resulting chunks along with input length and processing times."
)

# Launch the app
iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://489a4dc38b2caff207.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


