In [1]:
%pip install requests
%pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests
from bs4 import BeautifulSoup
import logging
import csv
from io import StringIO

# Function to fetch all ASX symbols from the ASX CSV file
def get_all_asx_symbols():
    url = "https://www.asx.com.au/asx/research/ASXListedCompanies.csv"
    
    try:
        response = requests.get(url)
        response.raise_for_status()

        # Use StringIO to read the CSV file
        csv_data = StringIO(response.text)
        csv_reader = csv.reader(csv_data)

        symbols = []
        next(csv_reader)  # Skip the header row

        # Process each row in the CSV file
        for row in csv_reader:
            if len(row) > 1:  # Ensure there are at least 2 columns
                asx_code = row[1].strip()  # The ASX code is in the second column
                if asx_code:  # Ensure it's not an empty string
                    symbols.append(asx_code)
        
        return symbols

    except requests.RequestException as e:
        logging.error(f"Error fetching ASX symbols: {e}")
        return []

# Function to get the latest news for an ASX stock
def get_asx_latest_news(stock_code):
    url = f"https://www.asx.com.au/asx/share-price-research/company/{stock_code}"

    try:
        # Make an HTTP GET request to fetch the page content
        response = requests.get(url)
        response.raise_for_status()  # Check if request was successful
        
        # Parse the page with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find the section of the webpage that contains the news articles
        news_section = soup.find('div', class_='company-news')  # Update the correct class based on the webpage structure

        if not news_section:
            logging.error(f"No news section found for {stock_code}")
            return []

        # Extract each news article
        news_items = news_section.find_all('div', class_='news-item')  # Adjust based on the webpage structure
        news_data = []

        for item in news_items:
            # Extract the news title
            title_tag = item.find('a', class_='news-title')  # Update the class or tag based on the webpage
            title = title_tag.text.strip() if title_tag else "No title"

            # Extract the news link
            link = title_tag['href'] if title_tag else "#"

            # Extract the publication date
            date_tag = item.find('span', class_='news-date')  # Update the class or tag based on the webpage
            date = date_tag.text.strip() if date_tag else "No date"

            # Append the extracted data
            news_data.append({
                'title': title,
                'link': f"https://www.asx.com.au{link}",
                'date': date
            })

        return news_data

    except requests.RequestException as e:
        logging.error(f"Error fetching news for {stock_code}: {e}")
        return []

# Function to fetch and print news content from the article page
def fetch_news_content(news_url):
    try:
        # Fetch the news article content
        response = requests.get(news_url)
        response.raise_for_status()

        # Parse the content with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract the main content from the article page
        # This will depend on the structure of the news article page, adjust accordingly
        content_section = soup.find('div', class_='article-content')  # Adjust class name based on real HTML structure
        if content_section:
            return content_section.get_text(strip=True)
        else:
            return "No content available."

    except requests.RequestException as e:
        logging.error(f"Error fetching content from {news_url}: {e}")
        return "Error fetching content."

# Function to save news content to a file named {symbol}_news.txt
def save_news_to_file(symbol, news_data):
    filename = f"{symbol}_news.txt"
    
    with open(filename, 'w', encoding='utf-8') as f:
        for news in news_data:
            f.write(f"Title: {news['title']}\n")
            f.write(f"Date: {news['date']}\n")
            f.write(f"Link: {news['link']}\n")

            # Fetch and write the news content
            content = fetch_news_content(news['link'])
            f.write(f"Content: {content}\n")
            f.write("-" * 80 + "\n")
    
    print(f"Saved news for {symbol} in {filename}")

# Main program
def main():
    # Fetch all ASX symbols
    symbols = get_all_asx_symbols()

    # Check if any symbols were fetched
    if not symbols:
        print("No symbols found.")
        return

    # Limit to the first 10 symbols for demo purposes
    print(f"Total number of ASX symbols fetched: {len(symbols)}")
    print(f"First 10 ASX symbols: {symbols[:10]}")
    
    for symbol in symbols[:10]:  # Limit it to 10 symbols for this demo
        print(f"\nFetching news for {symbol}")
        latest_news = get_asx_latest_news(symbol)

        # Save the latest news for each stock
        if latest_news:
            save_news_to_file(symbol, latest_news)
        else:
            print(f"No news found for {symbol}")

# Run the main function
if __name__ == "__main__":
    main()

ERROR:root:No news section found for ASX code


Total number of ASX symbols fetched: 2073
First 10 ASX symbols: ['ASX code', '14D', '29M', 'T3D', 'TGP', 'TCF', 'TOT', 'TDO', '3PL', '4DX']

Fetching news for ASX code
No news found for ASX code

Fetching news for 14D


ERROR:root:No news section found for 14D
ERROR:root:No news section found for 29M


No news found for 14D

Fetching news for 29M
No news found for 29M

Fetching news for T3D


ERROR:root:No news section found for T3D
ERROR:root:No news section found for TGP


No news found for T3D

Fetching news for TGP
No news found for TGP

Fetching news for TCF


ERROR:root:No news section found for TCF
ERROR:root:No news section found for TOT


No news found for TCF

Fetching news for TOT
No news found for TOT

Fetching news for TDO


ERROR:root:No news section found for TDO


No news found for TDO

Fetching news for 3PL


ERROR:root:No news section found for 3PL
ERROR:root:No news section found for 4DX


No news found for 3PL

Fetching news for 4DX
No news found for 4DX
