#16

In [1]:
# Importing necessary libraries
import requests
from bs4 import BeautifulSoup
import sqlite3
from datetime import datetime

# Step 1: Define the URL and scrape the webpage
base_url = "https://www.iranjib.ir"
gold_currency_url = f"{base_url}/category/20/اخبار-طلا-و-ارز/"

# Function to fetch the HTML content
def fetch_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        print(f"Failed to fetch {url}. Status code: {response.status_code}")
        return None

# Step 2: Parse the HTML content to extract news data
def parse_news(html_content):
    soup = BeautifulSoup(html_content, "html.parser")
    news_list = []

    # Find the news articles
    articles = soup.find_all("div", class_="news-item")  # Adjust the class name based on the website's structure

    for article in articles:
        title = article.find("h3").text.strip()
        link = base_url + article.find("a")["href"].strip()
        date_text = article.find("span", class_="date").text.strip()  # Adjust based on site
        summary = article.find("p").text.strip() if article.find("p") else "No summary available"

        # Convert date to standard format
        try:
            published_date = datetime.strptime(date_text, "%Y/%m/%d")
        except ValueError:
            published_date = None

        news_list.append((title, link, published_date, summary))

    return news_list

# Step 3: Store the data in SQLite
def store_data_in_db(news_list):
    conn = sqlite3.connect("news_data.db")
    cursor = conn.cursor()

    # Create table if not exists
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS gold_currency_news (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        title TEXT,
        link TEXT,
        published_date DATE,
        summary TEXT
    )
    """)

    # Insert data
    for news in news_list:
        cursor.execute("""
        INSERT INTO gold_currency_news (title, link, published_date, summary)
        VALUES (?, ?, ?, ?)
        """, news)

    conn.commit()
    conn.close()

# Main script
if __name__ == "__main__":
    html_content = fetch_html(gold_currency_url)
    if html_content:
        news_data = parse_news(html_content)
        store_data_in_db(news_data)
        print(f"Stored {len(news_data)} news articles in the database.")


Stored 0 news articles in the database.
