In [None]:
# Install and import News API and other necessary Python librarires
!pip install newsapi-python
!pip install key
!pip install requests
!pip install bs4
!pip install transformers
from newsapi import NewsApiClient
from key import my_api_key
import datetime as dt
import pandas as pd
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

# Remove character limit for data frames
pd.set_option('display.max_colwidth', None)

# Link API to API key and define as a variable
newsapi = NewsApiClient(api_key = my_api_key)

# Generate recent news articles in English regarding "TPG Telecom"
data = newsapi.get_everything(q='TPG Telecom', language = 'en')

# Define a variable for the generated articles
articles = data['articles']

# Format and number collected articles
for x,y in enumerate(articles):
    print(f'{x} {y["title"]}') 
for key, value in articles[0].items():
    print(f"\n{key.ljust(15)}{value}")

# Create a data frame for the articles
df = pd.DataFrame(articles)

# Remove all information except for url to articles
website_links = df['url']

# Set up AI text summarisation model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Define a function which applies the summarisation model to url inputs
def summarise_url(url):
    try:
        response = requests.get(url, timeout=5)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        title = soup.title.string if soup.title else 'No Title'
        para = soup.find_all('p')
        text = ''.join(p.get_text() for p in para)

        text = text[:3000]

        summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
        return summary[0]['summary_text']

    except Exception as e:
        return f"Error summarisng: {e}"
    
# Apply the summarisation function to the series containing the links to articles
website_links['summary'] = df['url'].apply(summarise_url)

# Define the summary of links
TPG_summary = website_links['summary']

# As some news cites are whitelisted from news APIs (CNN), create a variable containing the keywords 'Error' and 'CNN' so they can be removed
keywords = ['Error', 'CNN']

# Refine the series of summaries to remove any summaries which have errors or are by CNN
refined_summary = TPG_summary[~TPG_summary.astype(str).apply(
    lambda x: any(kw in x for kw in keywords)
)]

# Redefine the series so there are no spaces in the data frame
refined_summary = refined_summary.loc[[2,4,9,14]]

# Rewrite the series so that they are in the same cell but separated by paragraphs
refined_summary = "\n\n".join(TPG_summary.loc[[2,4,9,14]])

# Filter out any characters that are not readable
TPG_summary_text = refined_summary.encode('latin-1', 'ignore').decode('latin-1')

# Create a txt document in the 'Main PDF + Images' folder with the final recent news summaries
file_path = "/workspaces/FINM3422-Group-11/Main PDF + Images/TPG_summary_text.txt"
with open(file_path, 'w') as file:
    file.write(TPG_summary_text)




  from .autonotebook import tqdm as notebook_tqdm


0 DDoS Mitigation Leak
1 Tech, Media & Telecom Roundup: Market Talk
2 Inaki Berroeta Buys 335,102 Shares of TPG Telecom Limited (ASX:TPG) Stock
3 In Memoriam: The 31 Billionaires Who Died Over The Past Year
4 Singapore Telecoms Industry Report 2025, with Detailed Operator Profiles and Forecasts for Singtel, Starhub, M1 and Simba
5 2 ASX growth shares I'd buy to try to beat the market
6 Should I buy Brickworks or Soul Patts shares?
7 If I could only own 1 ASX stock, it would be this one
8 2 ASX shares I think are fantastic for beginners
9 Australia’s TPG yearns for the simple life
10 [NANOG] Weekly Global IPv4 Routing Table Report
11 [NANOG] Weekly Global IPv4 Routing Table Report
12 [NANOG] Weekly Global IPv4 Routing Table Report
13 China Mobile stakes a claim on HKBN
14 Satellite and 6G technology set to revolutionise emergency services

source         {'id': None, 'name': 'Kentik.com'}

author         None

title          DDoS Mitigation Leak

description    In this edition of Beyond

Device set to use cpu
Your max_length is set to 130, but your input_length is only 23. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)
Your max_length is set to 130, but your input_length is only 3. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=1)
Your max_length is set to 130, but your input_length is only 3. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=1)
Your max_length is set to 130, but your input_length is only 3. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', ma