<a href="https://colab.research.google.com/github/kathulavigneshwari96-art/Summarize-News-Articles-with-ML-Internship-Project-/blob/main/Summarize_News_Articles_with_ML3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install required libraries
!pip install newspaper3k
!pip install sumy
!pip install transformers
!pip install rouge-score
!pip install nltk
!pip install lxml[html_clean]
!pip install requests beautifulsoup4

# Step 2: Import libraries
from newspaper import Article
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from transformers import pipeline
import nltk
import pandas as pd
import requests
from bs4 import BeautifulSoup

nltk.download('punkt')
nltk.download('punkt_tab')

# Step 3: Function to fetch article
def fetch_article(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        soup = BeautifulSoup(response.content, 'html.parser')
        # Try to find common article text containers
        article_text = ""
        for tag in ['p', 'div', 'article', 'main', 'section', 'span']: # Added more tags
            for paragraph in soup.find_all(tag):
                article_text += paragraph.get_text() + "\n"
        if not article_text:
             # Fallback to newspaper3k if other methods fail
            article = Article(url)
            article.download()
            article.parse()
            article_text = article.text

        return article_text
    except Exception as e:
        print(f"Error fetching article: {e}")
        return None


# Step 4: Extractive summarization function (LexRank)
def extractive_summary(text, num_sentences=5):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, num_sentences)
    return " ".join([str(sentence) for sentence in summary])

# Step 5: Abstractive summarization function (Transformer model)
def abstractive_summary(text, max_len=150, min_len=50):
    summarizer = pipeline("summarization")
    summary_text = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)
    return summary_text[0]['summary_text']

# Step 6: Replace with your ISRO news article URL
url = "PLACEHOLDER_URL" # Added a placeholder URL
# Manual article text (since MSN News URL doesn't work with newspaper3k)
article_text = """
ISRO Chief V Narayanan has announced that the AI-powered humanoid robot Vyommitra will be part of the uncrewed Gaganyaan mission scheduled for December 2025. Vyommitra, developed by the Indian Space Research Organisation (ISRO), is designed to simulate human functions in space environments. The robot will assist in monitoring module parameters, issuing alerts, and performing life support operations during the mission.

Vyommitra is equipped with capabilities to operate switch panels, recognize and respond to astronauts' queries, and mimic human activities in space. This mission marks a significant step in India's human spaceflight program, demonstrating the country's advancements in space technology and AI integration.

The Gaganyaan mission aims to send Indian astronauts into space, and the inclusion of Vyommitra in the uncrewed mission will provide valuable data and insights to ensure the safety and success of future crewed missions.
"""

if text:
    print("Original Article (first 500 characters):\n", text[:500], "...\n")
    print("Full Extracted Text:\n", text, "\n") # Print full text


    # Extractive Summary
    ext_summary = extractive_summary(article_text, 5)
    print("Extractive Summary:\n", ext_summary, "\n")

    # Abstractive Summary
    abs_summary = abstractive_summary(article_text)
    print("Abstractive Summary:\n", abs_summary, "\n")

    # Save summaries to CSV
    data = {
        "URL": [url],
        "Original Article": [text],
        "Extractive Summary": [ext_summary],
        "Abstractive Summary": [abs_summary]
    }
    df = pd.DataFrame(data)
    df.to_csv("ISRO_news_summary.csv", index=False)
    print("Summaries saved to 'ISRO_news_summary.csv'")

else:
    print(" Error fetching article. Please check the URL.")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


Original Article:
 
ISRO Chief V Narayanan has announced that the AI-powered humanoid robot Vyommitra will be part of the uncrewed Gaganyaan mission scheduled for December 2025. Vyommitra, developed by the Indian Space Research Organisation (ISRO), is designed to simulate human functions in space environments. The robot will assist in monitoring module parameters, issuing alerts, and performing life support operations during the mission.

Vyommitra is equipped with capabilities to operate switch panels, recognize and respond to astronauts' queries, and mimic human activities in space. This mission marks a significant step in India's human spaceflight program, demonstrating the country's advancements in space technology and AI integration.

The Gaganyaan mission aims to send Indian astronauts into space, and the inclusion of Vyommitra in the uncrewed mission will provide valuable data and insights to ensure the safety and success of future crewed missions.

Full Extracted Text:
 
ISRO C

Device set to use cpu


Abstractive Summary:
  Vyommitra will be part of the uncrewed Gaganyaan mission scheduled for December 2025 . The robot is designed to simulate human functions in space environments . It is equipped with capabilities to operate switch panels, recognize and respond to astronauts' queries . 

Summaries saved to 'ISRO_news_summary.csv'
