In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta

# Define the URL and the time range for scraping (last 45 days)
url = 'https://news.metal.com/list/industry/aluminium'
end_date = datetime.now()
start_date = end_date - timedelta(days=45)

def scrape_news(url):
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Failed to load page {url}")
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Print the soup content to understand its structure
    print(soup.prettify()[:2000])  # Print the first 2000 characters of the HTML

    articles = []

    # Adjust this part based on the actual HTML structure
    for item in soup.find_all('div', class_='newsItemContent___2oFIU'):
        title_tag = item.find('div', class_='title___1baLV')
        summary_tag = item.find('div', class_='description___z7ktb descriptionspec___lj3uG')
        date_tag = item.find('div', class_='date___3dzkE')
        
        title = title_tag.get_text().strip() if title_tag else 'No title'
        summary = summary_tag.get_text().strip() if summary_tag else 'No summary'
        date_str = date_tag.get_text().strip() if date_tag else 'No date'
        
        try:
            date = datetime.strptime(date_str, '%b %d, %Y %H:%M')
            if start_date <= date <= end_date:
                articles.append({
                    'title': title,
                    'summary': summary,
                    'date': date.strftime('%Y-%m-%d %H:%M')
                })
        except ValueError:
            print(f"Date format error with date: {date_str}")

    return articles

def save_to_csv(articles, filename='scraped_data.csv'):
    df = pd.DataFrame(articles)
    if df.empty:
        raise ValueError("No data scraped; DataFrame is empty.")
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == '__main__':
    articles = scrape_news(url)
    save_to_csv(articles)


<!DOCTYPE html>
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, viewport-fit=cover" name="viewport"/>
  <meta content="https://static.metal.com/common.metal.com/images/header-en/smm-logo.png" property="og:image"/>
  <link href="https://static.metal.com/www.metal.com/2.18.43/image/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
  <link href="https://static.smm.cn/common.smm.cn/css/antd@4.16.13.min.css" rel="stylesheet"/>
  <script src="https://static.metal.com/common.metal.com/js/fastclick@1.0.6.js">
  </script>
  <script>
   if ('addEventListener' in document) {
                            document.addEventListener('DOMContentLoaded', function() {
                                FastClick.attach(document.body);
                            }, false);
                        }
                        if(!window.Promise) {
                            document.writeln('<script src="https://static.metal.com/common.metal.com/js/es6-promise@3.2.2.min.js"'+