In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
def to_chunks(data):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=3000,
        chunk_overlap=50
    )
    docs=text_splitter.split_text(data)
    return docs

In [3]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


In [4]:
def load_distilbert_model(model_name="sshleifer/distilbart-cnn-12-6"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return tokenizer, model

In [5]:
def summarize_text(tokenizer, model, text, max_chunk_length, summary_max_length):
    inputs = tokenizer(text, return_tensors="pt", max_length=max_chunk_length, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=summary_max_length, min_length=200, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [6]:
def summarize_article(article, model_name="sshleifer/distilbart-cnn-12-6"):
    data = article
    chunks = to_chunks(data)
    tokenizer, model = load_distilbert_model(model_name)
    summaries = []
    for chunk in chunks:
        chunk_text = chunk
        summary = summarize_text(tokenizer, model, chunk_text,3000,800)
        summaries.append(summary)
    concatenated_summaries = " ".join(summaries)
    #  Second summarization pass: Summarize the concatenated summaries
    intermediate_chunks = [concatenated_summaries[i:i+3000] for i in range(0, len(concatenated_summaries), 3000)]
    final_summaries = []
    for intermediate_chunk in intermediate_chunks:
        final_summary = summarize_text(tokenizer, model, intermediate_chunk,3000,800)
        final_summaries.append(final_summary)
    
    final_summary_text = " ".join(final_summaries)
    
    return final_summary_text

In [7]:
import requests
from bs4 import BeautifulSoup


In [33]:
url = "https://www.hindustantimes.com/business/elon-musk-s-x-wants-laid-off-employees-to-give-money-back-they-were-overpaid-101718355497670.html"
headers = {
    'User-Agent': 'Your User Agent String',
}
r = requests.get(url, headers=headers)

soup=BeautifulSoup(r.text,'html.parser')
paragraphs=soup.find_all('p')

In [34]:
paragraphs

[<p>Subscribe Now! Get features like</p>,
 <p>Elon Musk's company X (formerly Twitter) is asking at least six former Australian employees to repay the money which they have been accidentally given. The company is threatening legal action to recover these overpayments, Sydney Morning Herald reported. The issue come after an error in currency conversion from US dollars to Australian dollars. Emails from the company's Asia Pacific HR department show that overpayments occurred in January 2023, ranging from $1,500 to $70,000 per employee.</p>,
 <p>As per the report, these payments were part of a 'deferred cash compensation' tied to employee shares which were originally valued at $54.20 USD each which is the price Elon Musk paid when he acquired Twitter in 2022. None of the former employees have returned the money, the report claimed. The overpayments happened because Elon Musk's company used an incorrect conversion rate which was 2.5 times the actual value.</p>,
 <p>As per Australian law, o

In [10]:
paragraphs[2].text

'The social media platform, formerly known as Twitter, acknowledged its error in currency conversion from the US to Australian dollars on the payments and asked its former employees to repay amounts of up to $70,000 in some cases, the report said on Wednesday.\xa0'

In [22]:
text= [paragraph.text for paragraph in paragraphs]
words=' '.join(text).split(' ')
article = ' '.join(words)

In [23]:
print(article)

File photo Billionaire Elon Musk's X has asked its sacked employees in Australia to return the money it claims was accidentally overpaid to them, the Sydney Morning Herald reported. The social media platform, formerly known as Twitter, acknowledged its error in currency conversion from the US to Australian dollars on the payments and asked its former employees to repay amounts of up to $70,000 in some cases, the report said on Wednesday.  The currency conversion errors made by X when employees were paid their entitlements after they were fired had reportedly led to overpayments of between $1,500 and $70,000. According to the report, at least six former X staff have received legal notices.  "It has come to our attention that you received a significant overpayment in error in January 2023," the Sydney Morning Herald quoted X's Asia Pacific human resources department as saying, citing an email to several former employees this year. "We would be grateful if you could arrange the repayment 

In [24]:
summary = summarize_article(article)


In [14]:
print(summary)

 The social media platform, formerly known as Twitter, acknowledged its error in currency conversion from the US to Australian dollars on the payments. X has been accused in multiple suits of numerous labor and workplace violations. Thousands of former Twitter employees said they were cheated of severance pay when the billionaire laid them off after acquiring the social media site. X also said that the overpayment was related to "deferred cash compensation" in the form of employee shares issued to the staff when they joined Twitter. At least six former X staff have received legal notices, according to the Sydney Morning Herald report, the report said. The billionaire's $44 billion acquisition of Twitter in 2022 is due to take place in 2022. Back to Mail Online home to go to the page you came from: http://www.dailymailonline.com/news/sundestribune/dailymail/dailydailymail/. Back to the original version of this article. Back to The Newsquiz.com: Please contact us on


In [52]:
import os
from dotenv import load_dotenv

In [56]:
load_dotenv()
api_key_news=os.getenv('NEWSAPI_ORG')


In [59]:
# import requests

# # Replace with your API key
# api_key = api_key_news
# base_url = 'https://newsapi.org/v2/everything'

# # Example query parameters
# params = {
#     'q': 'apple',  # Example keyword (replace with your specific keyword)
#     'domains': 'techcrunch.com,theverge.com',  # Domains related to technology
#     'sortBy': 'publishedAt',  # Sort by published date
#     'pageSize': 5,  # Number of results per page (max 100)
#     'apiKey': api_key
# }

# response = requests.get(base_url, params=params)

# if response.status_code == 200:
#     data = response.json()
#     articles = data['articles']
#     if articles:
#         for idx, article in enumerate(articles):
#             print(f"News {idx + 1}: {article['title']}")
#             print(f"URL: {article['url']}")
#             print()  # Add a blank line for readability
#     else:
#         print("No articles found.")
# else:
#     print("Error fetching data:", response.status_code)
