In [1]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import SummaryIndex
from llama_index.llms.groq import Groq
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Document
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import nest_asyncio
import os
from dotenv import load_dotenv
import tempfile 


  from .autonotebook import tqdm as notebook_tqdm


In [9]:
load_dotenv()
API_KEY = os.getenv("API_KEY")

llm = Groq(model="llama-3.1-8b-instant",api_key=API_KEY)
Settings.llm = llm
# Settings.embed_model = HuggingFaceEmbedding()

In [19]:
async def summarize_text(data: str) -> str:
        """
        Summarize the given text data using LlamaIndex and Groq
        
        Args:
            data (str): Text content to summarize
            
        Returns:
            str: Summarized text
        """
        try:
            # Create document directly from text
            document = Document(text=data)
            
            # Split into nodes
            splitter = SentenceSplitter(chunk_size=1024)
            nodes = splitter.get_nodes_from_documents([document])

            # Create and query index
            summary_index = SummaryIndex(nodes)
            summary_query_engine = summary_index.as_query_engine(
                response_mode="tree_summarize",
                use_async=True
            )

            # Get response
            response = await summary_query_engine.query(
                "Please provide a comprehensive summary of the main points and key details from the text."
            )
            
            # Convert response to string explicitly
            summary_text = str(response)
            print(summary_text)
            # return summary_text


        except Exception as e:
            print(f"Error in summarization: {str(e)}")
            raise Exception(f"Error generating summary: {str(e)}")


In [20]:
from scrape import ArticleScraper
url = "https://www.hindustantimes.com/india-news/article-370-electoral-bonds-cji-designate-sanjiv-khanna-was-part-of-landmark-verdicts-101729789006951.html"
scrapper = ArticleScraper()
article_data = scrapper.scrape_article(url)

In [21]:
resp = summarize_text(article_data['content'])


  resp = summarize_text(article_data['content'])


In [22]:
print(resp)

<coroutine object summarize_text at 0x176235a40>
