In [3]:
import os
import requests
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

# Download once
nltk.download("punkt")
nltk.download("stopwords")

# -------------------------------------
# 1. Extractive Summarization
# -------------------------------------
def summarize_text(text: str, ratio: float = 1.2):
    words = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    filtered_words = [w.lower() for w in words if w.lower() not in stop_words]

    freq_table = {}
    for word in filtered_words:
        freq_table[word] = freq_table.get(word, 0) + 1

    sentences = sent_tokenize(text)
    sentence_value = {}

    for sentence in sentences:
        for word, freq in freq_table.items():
            if word in sentence.lower():
                sentence_value[sentence] = sentence_value.get(sentence, 0) + freq

    if not sentence_value:
        return text

    avg_score = sum(sentence_value.values()) / len(sentence_value)

    summary = " ".join(
        [sentence for sentence in sentences
         if sentence_value.get(sentence, 0) > ratio * avg_score]
    )

    return summary if summary.strip() else text


# -------------------------------------
# 2. BART Length Presets
# -------------------------------------
# Very short: 10–30 words
# Others: percentage ranges of original text word count
LENGTH_PRESETS = {
    "very_short": (10, 30),        # fixed word count
    "short": (0.05, 0.15),         # 5–15% of original text
    "medium": (0.15, 0.25),        # 15–25%
    "normal": (0.25, 0.35),
    "good":(0.35,0.45),        # 25–35%
}

API_URL = "https://router.huggingface.co/hf-inference/models/google/pegasus-cnn_dailymail"

headers = {"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}


# -------------------------------------
# 3. Abstractive Summarization
# -------------------------------------
def bart_summarize(text, original_text, level="normal"):
    total_words = len(original_text.split())
    preset = LENGTH_PRESETS.get(level, (0.25, 0.35))

    if level == "very_short":
        min_length, max_length = preset
    else:
        min_length = max(int(total_words * preset[0]), 5)
        max_length = max(int(total_words * preset[1]), min_length + 5)

    payload = {
        "inputs": text,
        "parameters": {
            "max_length": max_length,
            "min_length": min_length,
            "do_sample": False
        }
    }

    response = requests.post(API_URL, headers=headers, json=payload, timeout=60)

    try:
        return response.json()[0]["summary_text"]
    except:
        return text


# -------------------------------------
# 4. Hybrid Summarizer
# -------------------------------------
def hybrid_summarize(text, ratio=1.2, level="normal"):
    extractive_output = summarize_text(text, ratio=ratio)
    final_output = bart_summarize(extractive_output, text, level=level)
    # final_output = bart_summarize(text, text, level=level)

    return final_output


# -------------------------------------
# Example Usage
# -------------------------------------
if __name__ == "__main__":
    input_text = """
In recent decades, globalization has transformed the nature of economic activity, creating highly interconnected markets that operate across national boundaries. Multinational corporations have leveraged technological advancements to optimize production, distribution, and supply chain management, thereby achieving unprecedented efficiency. However, this interdependence has also increased systemic vulnerabilities, as evidenced by the economic shocks caused by events such as the 2008 financial crisis and the COVID-19 pandemic. The sudden disruption of supply chains exposed the fragility of just-in-time inventory systems, which, while cost-efficient, offer little resilience to unexpected global events. In addition to logistical vulnerabilities, geopolitical tensions between major powers have introduced new risks to trade stability, with tariffs, sanctions, and export controls influencing the flow of goods, capital, and technology. Meanwhile, environmental considerations have become increasingly central to economic planning, as governments and organizations confront the pressing challenges of climate change, resource scarcity, and sustainable development. Companies are now pressured to balance efficiency with ethical and environmental responsibility, leading to the adoption of green technologies, renewable energy solutions, and circular economy models. Advanced technologies such as artificial intelligence, blockchain, and IoT are being integrated into supply chain management to improve transparency, forecast demand, and automate decision-making processes. While these innovations hold the promise of enhanced operational performance, they also introduce challenges related to cybersecurity, data privacy, and workforce adaptation. As AI systems increasingly influence strategic business decisions, companies must grapple with ethical considerations regarding bias, accountability, and governance. Concurrently, the rapid pace of technological disruption creates a competitive environment where organizations that fail to adapt risk obsolescence, while those that leverage new tools effectively can gain market advantage. The global labor market is also experiencing profound shifts, with automation displacing routine tasks while generating demand for highly skilled professionals capable of managing complex technological infrastructures. This evolving landscape necessitates adaptive policies, strategic foresight, and continuous investment in human capital. Ultimately, the intersection of globalization, technological innovation, and ethical imperatives is redefining the parameters of modern economic activity, highlighting the need for resilience, innovation, and sustainable growth in an increasingly complex and uncertain world."""
    summary = hybrid_summarize(input_text, ratio=1.2, level="good")
    print(summary)


[nltk_data] Downloading package punkt to /home/sandeep/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/sandeep/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Globalization has transformed the nature of economic activity, creating highly interconnected markets that operate across national boundaries .<n>This interdependence has also increased systemic vulnerabilities, as evidenced by the 2008 financial crisis and the COVID-19 pandemic .<n>As AI systems increasingly influence strategic business decisions, companies must grapple with ethical considerations regarding bias, accountability, and governance .<n>The global labor market is also experiencing profound shifts, with automation displacing routine tasks while generating demand for highly skilled professionals .<n>Ultimately, the intersection of globalization, technological innovation, and ethical imperatives is redefining the parameters of modern economic activity .


In [27]:
text = """
    In today’s rapidly evolving business environment, organizations are constantly seeking ways to gain a competitive advantage by improving operational efficiency and making data-driven decisions. One of the major challenges faced by business analysts, like Janani, is dealing with the overwhelming amount of textual information available from various sources such as market reports, industry publications, customer reviews, financial statements, and competitor updates. Analyzing such massive volumes of text manually can be time-consuming and prone to human error. As a result, there is a growing need for automated tools that can quickly process and summarize large datasets into actionable insights.

    Industry trends change at a fast pace, and businesses must stay informed to adapt their strategies accordingly. Analysts need to identify emerging technologies, shifting consumer preferences, regulatory changes, and global market fluctuations. However, manually reading through dozens of lengthy articles every day reduces productivity and delays decision-making. Automated summarization provides a fast and efficient solution by condensing these large documents into short, meaningful summaries that highlight only the most important information.

    Competitor analysis is another crucial area where automated text summarization proves beneficial. Companies frequently release press announcements, product updates, financial results, and marketing campaigns. Tracking these activities manually is nearly impossible on a daily basis. With summarization tools, analysts can instantly extract key competitor insights, enabling businesses to respond quickly with strategic planning, product improvements, or targeted marketing efforts.

    Additionally, the widespread use of social media and online platforms has led to an explosion of user-generated content. Customer feedback, reviews, and public sentiment often contain valuable insights that can shape business decisions. Yet, the sheer volume of this data makes manual review impractical. Automated summarization allows analysts to distill customer opinions, identify recurring issues, and uncover positive trends that can guide product development and enhance customer satisfaction.

    Artificial intelligence and natural language processing technologies play a central role in building effective summarization tools. By leveraging algorithms that analyze word frequency, sentence importance, and contextual meaning, these systems can produce accurate summaries that retain the essence of the original text. This not only saves time for analysts but also ensures consistency and reduces the risk of missing critical information.

    In conclusion, automated text summarization is becoming an essential component of modern business intelligence. It empowers analysts like Janani to navigate the vast amount of textual data efficiently, extract meaningful insights, and make informed decisions quickly. As organizations continue to embrace digital transformation, tools like SummAI will play a vital role in enhancing productivity, improving strategic planning, and maintaining a competitive edge in the marketplace.

"""

In [5]:
import os
import requests
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

# Download once
nltk.download("punkt")
nltk.download("stopwords")

# -------------------------------------
# 1. Extractive Summarization
# -------------------------------------
def summarize_text(text: str, ratio: float = 1.2):
    words = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    filtered_words = [w.lower() for w in words if w.lower() not in stop_words]

    freq_table = {}
    for word in filtered_words:
        freq_table[word] = freq_table.get(word, 0) + 1

    sentences = sent_tokenize(text)
    sentence_value = {}

    for sentence in sentences:
        for word, freq in freq_table.items():
            if word in sentence.lower():
                sentence_value[sentence] = sentence_value.get(sentence, 0) + freq

    if not sentence_value:
        return text

    avg_score = sum(sentence_value.values()) / len(sentence_value)

    summary = " ".join(
        [sentence for sentence in sentences
         if sentence_value.get(sentence, 0) > ratio * avg_score]
    )

    return summary if summary.strip() else text


# -------------------------------------
# Removed all LEVEL logic completely
# -------------------------------------

API_URL = "https://router.huggingface.co/hf-inference/models/google/pegasus-cnn_dailymail"
headers = {"Authorization": f"Bearer {os.environ['HF_TOKEN']}"}


# -------------------------------------
# 2. Abstractive Summarization (simple, no level)
# -------------------------------------
def bart_summarize(text):
    payload = {
        "inputs": text,
        "parameters": {
            "max_length": 250,
            "min_length": 100,
            "do_sample": False
        }
    }

    response = requests.post(API_URL, headers=headers, json=payload, timeout=60)

    try:
        return response.json()[0]["summary_text"]
    except:
        return text


# -------------------------------------
# 3. Hybrid Summarizer (no level param)
# -------------------------------------
def hybrid_summarize(text, ratio=1.2):
    extractive_output = summarize_text(text, ratio=ratio)
    final_output = bart_summarize(extractive_output)
    return final_output


# -------------------------------------
# Example Usage
# -------------------------------------
text = """
    In today’s rapidly evolving business environment, organizations are constantly seeking ways to gain a competitive advantage by improving operational efficiency and making data-driven decisions. One of the major challenges faced by business analysts, like Janani, is dealing with the overwhelming amount of textual information available from various sources such as market reports, industry publications, customer reviews, financial statements, and competitor updates. Analyzing such massive volumes of text manually can be time-consuming and prone to human error. As a result, there is a growing need for automated tools that can quickly process and summarize large datasets into actionable insights.

    Industry trends change at a fast pace, and businesses must stay informed to adapt their strategies accordingly. Analysts need to identify emerging technologies, shifting consumer preferences, regulatory changes, and global market fluctuations. However, manually reading through dozens of lengthy articles every day reduces productivity and delays decision-making. Automated summarization provides a fast and efficient solution by condensing these large documents into short, meaningful summaries that highlight only the most important information.

    Competitor analysis is another crucial area where automated text summarization proves beneficial. Companies frequently release press announcements, product updates, financial results, and marketing campaigns. Tracking these activities manually is nearly impossible on a daily basis. With summarization tools, analysts can instantly extract key competitor insights, enabling businesses to respond quickly with strategic planning, product improvements, or targeted marketing efforts.

    Additionally, the widespread use of social media and online platforms has led to an explosion of user-generated content. Customer feedback, reviews, and public sentiment often contain valuable insights that can shape business decisions. Yet, the sheer volume of this data makes manual review impractical. Automated summarization allows analysts to distill customer opinions, identify recurring issues, and uncover positive trends that can guide product development and enhance customer satisfaction.

    Artificial intelligence and natural language processing technologies play a central role in building effective summarization tools. By leveraging algorithms that analyze word frequency, sentence importance, and contextual meaning, these systems can produce accurate summaries that retain the essence of the original text. This not only saves time for analysts but also ensures consistency and reduces the risk of missing critical information.

    In conclusion, automated text summarization is becoming an essential component of modern business intelligence. It empowers analysts like Janani to navigate the vast amount of textual data efficiently, extract meaningful insights, and make informed decisions quickly. As organizations continue to embrace digital transformation, tools like SummAI will play a vital role in enhancing productivity, improving strategic planning, and maintaining a competitive edge in the marketplace.

"""
if __name__ == "__main__":
    input_text = text
    summary = hybrid_summarize(input_text, ratio=1.2)
    print(summary)


[nltk_data] Downloading package punkt to /home/sandeep/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/sandeep/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Automated summarization allows analysts to distill customer opinions, identify recurring issues, and uncover positive trends .<n>It empowers analysts like Janani to navigate the vast amount of textual data efficiently, extract meaningful insights, and make informed decisions quickly .<n>As organizations continue to embrace digital transformation, tools like SummAI will play a vital role in enhancing productivity, improving strategic planning, and maintaining a competitive edge in the marketplace .<n>As organizations continue to embrace digital transformation, tools like SummAI will play a vital role in enhancing productivity, improving strategic planning, and maintaining a competitive edge in the marketplace .
