In [None]:
pip install requests beautifulsoup4 googletrans==4.0.0-rc1 transformers



### Importing Libraries

In [None]:
import requests
from bs4 import BeautifulSoup
from googletrans import Translator  # For translation
from transformers import BartForConditionalGeneration, BartTokenizer

### Web Scraping Function
This function scrapes the news article's title, date, and content from the provided URL.

In [None]:
def scrape_news_articles(url):

    # Send a request to the URL and get the HTML response
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    title = soup.find('h1')
    date = soup.find('span')
    content = soup.find('div')

    title = title.text.strip() if title else "Title not found"
    date = date.text.strip() if date else "Date not found"
    content = content.text.strip() if content else "Content not found"

    return {'title': title, 'date': date, 'content': content}

### Translation Function
This function translates the input text to the specified target language (English).

In [None]:
def translate_text(text, target_lang='en'):

    # Using googletrans library for translation (or use Google Translate API)
    translator = Translator()
    translated_text = translator.translate(text, dest=target_lang).text
    return translated_text

### Summarization Function
This function summarizes the input text using a BART model from Hugging Face transformers library.

In [None]:
def summarize_text(text):

    # Load the pre-trained BART model and tokenizer
    model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
    tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

    # Tokenize and encode the input text for BART model
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)

    # Generate the summary
    summary_ids = model.generate(inputs, max_length=150, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)

    # Decode the summary back to text
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [None]:
url = 'https://hindi.asianetnews.com/entertainment/tv/salman-khan-bigg-boss-18-contestants-list-premiere-date-and-all/articleshow-yzbd8ld'
article_data = scrape_news_articles(url)

In [None]:
if article_data['content'] != "Content not found":
    translated_text = translate_text(article_data['content'])
    summary = summarize_text(translated_text)

    print('Title:', article_data['title'])
    print('Date:', article_data['date'])
    print('Summary:', summary)
    print('Full Article:', translated_text)
else:
    print("Failed to retrieve article content.")

Title: Bigg Boss 18 बनाएगा अब तक का सबसे बड़ा रिकॉर्ड! जानिए कैसे रचेगा इतिहास?
Date: Home
Summary: According to the latest reports, the 18th season of 'Bigg Boss' is going to make the biggest record in the history of this show so far. Last season, it started with 17 contestants and 4 wild card entry. This time, stars like Dheeraj Dhupar, Anita Hasanandani can be seen.
Full Article: The National West Story Rajyaranojanlaifstylifsochechalphotogmsvidiosavidiyosyvioral homeenteanmenttvbigg boss 18 will make the biggest record so far!Know how history will create? Bigg Boss 18 will make the biggest record ever!Know how will history be created? Gagan Gurjar |Published: Sep 07 2024, 07:22 PM Istbigg Boss 18 will make the biggest record so far!Know how will history be created?The 18th season of 'Bigg Boss' can begin with 20 contestants.Last season, it started with 17 contestants and 4 wild card entry.This time, stars like Dheeraj Dhupar, Anita Hasanandani can be seen.His fans are eagerly waitin

In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [3

In [None]:
 import requests
from bs4 import BeautifulSoup
from googletrans import Translator
from transformers import BartForConditionalGeneration, BartTokenizer
import streamlit as st

# Web Scraping Function
def scrape_news_articles(url):
    """
    This function scrapes the news article's title, date, and content from the provided URL.
    """
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    title = soup.find('h1')  # Get the title element
    date = soup.find('span')  # Get the date element
    content = soup.find('div')  # Get the content element

    title = title.text.strip() if title else "Title not found"
    date = date.text.strip() if date else "Date not found"
    content = content.text.strip() if content else "Content not found"

    return {'title': title, 'date': date, 'content': content}

# Translation Function
def translate_text(text, target_lang='en'):
    """
    This function translates the input text to the specified target language (default: English).
    """
    translator = Translator()
    translated_text = translator.translate(text, dest=target_lang).text
    return translated_text

# Summarization Function
def summarize_text(text):
    """
    This function summarizes the input text using a BART model from Hugging Face transformers library.
    """
    model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
    tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)

    summary_ids = model.generate(inputs, max_length=150, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Streamlit UI
def display_news_summary(article_data, translated_title, translated_text, summary):
    """
    Display the article details, translation, and summary in a well-formatted Streamlit app.
    """
    st.title("News Scraper and Summarizer")

    # Display title in Hindi and English
    st.subheader("Title (Hindi):")
    st.write(f"**{article_data['title']}**")

    st.subheader("Title (English):")
    st.write(f"**{translated_title}**")

    # Display date
    st.subheader("Published Date:")
    st.write(f"**{article_data['date']}**")

    # Display content in Hindi
    st.subheader("Full Article (Hindi):")
    st.write(article_data['content'])

    # Display translated content in English
    st.subheader("Full Article (English):")
    st.write(translated_text)

    # Display summarized content
    st.subheader("Summary (English):")
    st.markdown(f"<h3 style='font-size:20px;'>{summary}</h3>", unsafe_allow_html=True)

# Example usage
url = 'https://hindi.asianetnews.com/entertainment/tv/salman-khan-bigg-boss-18-contestants-list-premiere-date-and-all/articleshow-yzbd8ld'

# Scrape news article
article_data = scrape_news_articles(url)

# Translate title and content
if article_data['content'] != "Content not found":
    translated_title = translate_text(article_data['title'])
    translated_text = translate_text(article_data['content'])

    # Summarize the translated text
    summary = summarize_text(translated_text)

    # Display everything in Streamlit
    display_news_summary(article_data, translated_title, translated_text, summary)

else:
    st.write("Failed to retrieve article content.")


2024-10-02 17:35:58.852 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
