In [1]:
cryptos = ["Bitcoin", "Ethereum", "Solana", "XRP", "Cardano", "Dogecoin", "Polkadot", "Chainlink", "Litecoin", "Avalanche"]


### COINDDESK 


In [3]:
import requests
from bs4 import BeautifulSoup

url = "https://www.coindesk.com/coindesk-news"

try:
    response = requests.get(url)
    response.raise_for_status()  # Check for HTTP errors

    # Parse HTML content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Print the full formatted HTML
    print(soup.prettify())

except Exception as e:
    print(f" Error: {e}")


<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <link as="image" imagesrcset="/_next/image?url=https%3A%2F%2Fcoindesk-next-4e34ng3ui-coindesk.vercel.app%2F_next%2Fstatic%2Fmedia%2Fcoindesk-logo.68661da3.png&amp;w=256&amp;q=75 1x, /_next/image?url=https%3A%2F%2Fcoindesk-next-4e34ng3ui-coindesk.vercel.app%2F_next%2Fstatic%2Fmedia%2Fcoindesk-logo.68661da3.png&amp;w=384&amp;q=75 2x" rel="preload"/>
  <link data-precedence="next" href="https://coindesk-next-4e34ng3ui-coindesk.vercel.app/_next/static/css/66d766ff7d9abfd9.css?dpl=dpl_GYjaRo4B6zd7ZgWB2mv71qL8BWcu" rel="stylesheet"/>
  <link data-precedence="next" href="https://coindesk-next-4e34ng3ui-coindesk.vercel.app/_next/static/css/16dbddfd1c5feb4b.css?dpl=dpl_GYjaRo4B6zd7ZgWB2mv71qL8BWcu" rel="stylesheet"/>
  <link as="script" fetchpriority="low" href="https:

In [38]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

def scrape_tag(tag, save_every=10, max_retries=3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    page = 2
    buffer_data = []
    total_data = []
    output_file = f"{tag}_news.csv"
    print(f"\n🔖 Starting scrape for tag: {tag}")

    retry_delay = 5  # Initial wait if rate-limited

    while True:
        url = f"https://www.coindesk.com/tag/{tag}/{page}/"
        print(f"🔍 Scraping {url}")

        retries = 0
        while retries < max_retries:
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                break
            elif response.status_code == 429:
                wait = retry_delay * (2 ** retries)
                print(f"⏳ Rate limited on page {page}. Waiting {wait} seconds...")
                time.sleep(wait)
                retries += 1
            else:
                print(f"⚠️ Unexpected status code {response.status_code} on page {page}. Skipping...")
                return {"tag": tag, "total_pages": page - 2}

        if response.status_code == 404:
            print(f"❌ Page {page} for tag '{tag}' not found (404). Stopping...")
            break

        soup = BeautifulSoup(response.content, "html.parser")
        articles = soup.find_all("div", class_="flex flex-col")

        for article in articles:
            headline_tag = article.find("h2", class_="font-headline-xs font-medium")
            summary_tag = article.find("p", class_="font-body text-charcoal-600 mb-4")
            author_tag = article.find("a", href=True, title=True)
            date_tag = article.find("span", class_="font-metadata text-color-charcoal-600 uppercase")

            if headline_tag and summary_tag and author_tag and date_tag:
                headline = headline_tag.get_text(strip=True)
                summary = summary_tag.get_text(strip=True)
                author = author_tag.get_text(strip=True)
                date_str = date_tag.get_text(strip=True)

                buffer_data.append({
                    "tag": tag,
                    "headline": headline,
                    "summary": summary,
                    "author": author,
                    "date": date_str,
                    "page": page
                })

        if (page - 1) % save_every == 0:
            df_buffer = pd.DataFrame(buffer_data)
            if not os.path.exists(output_file):
                df_buffer.to_csv(output_file, index=False)
            else:
                df_buffer.to_csv(output_file, mode='a', index=False, header=False)
            print(f"💾 Appended {len(buffer_data)} records to {output_file}")
            buffer_data.clear()

        page += 1
        time.sleep(2)  # Base delay between requests

    if buffer_data:
        df_buffer = pd.DataFrame(buffer_data)
        if not os.path.exists(output_file):
            df_buffer.to_csv(output_file, index=False)
        else:
            df_buffer.to_csv(output_file, mode='a', index=False, header=False)
        print(f"💾 Appended final {len(buffer_data)} records to {output_file}")

    return {"tag": tag, "total_pages": page - 2}


def scrape_tags_multithreaded(tags, max_threads=4):
    tag_page_counts = []

    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = {executor.submit(scrape_tag, tag): tag for tag in tags}
        for future in as_completed(futures):
            result = future.result()
            if result:
                tag_page_counts.append(result)

    count_df = pd.DataFrame(tag_page_counts)
    count_df.to_csv("tag_page_counts.csv", index=False)
    print("✅ Page count per tag saved to 'tag_page_counts.csv'")
    return tag_page_counts


# 🔄 Example usage
tags_to_scrape = ["bnb", "dogecoin", "usdt", "solana", "avalanche", "litecoin", "Chainlink", "Cardano", "Polkadot"]
scrape_tags_multithreaded(tags_to_scrape, max_threads=4)



🔖 Starting scrape for tag: bnb
🔍 Scraping https://www.coindesk.com/tag/bnb/2/

🔖 Starting scrape for tag: dogecoin
🔍 Scraping https://www.coindesk.com/tag/dogecoin/2/

🔖 Starting scrape for tag: usdt
🔍 Scraping https://www.coindesk.com/tag/usdt/2/

🔖 Starting scrape for tag: solana
🔍 Scraping https://www.coindesk.com/tag/solana/2/
🔍 Scraping https://www.coindesk.com/tag/usdt/3/
🔍 Scraping https://www.coindesk.com/tag/bnb/3/
🔍 Scraping https://www.coindesk.com/tag/dogecoin/3/
🔍 Scraping https://www.coindesk.com/tag/solana/3/
🔍 Scraping https://www.coindesk.com/tag/usdt/4/
🔍 Scraping https://www.coindesk.com/tag/dogecoin/4/
🔍 Scraping https://www.coindesk.com/tag/bnb/4/
🔍 Scraping https://www.coindesk.com/tag/solana/4/
🔍 Scraping https://www.coindesk.com/tag/dogecoin/5/
🔍 Scraping https://www.coindesk.com/tag/bnb/5/
🔍 Scraping https://www.coindesk.com/tag/solana/5/
🔍 Scraping https://www.coindesk.com/tag/usdt/5/
🔍 Scraping https://www.coindesk.com/tag/dogecoin/6/
🔍 Scraping https://www.

[{'tag': 'bnb', 'total_pages': 9},
 {'tag': 'dogecoin', 'total_pages': 23},
 {'tag': 'usdt', 'total_pages': 23},
 {'tag': 'litecoin', 'total_pages': 3},
 {'tag': 'avalanche', 'total_pages': 18},
 {'tag': 'Polkadot', 'total_pages': 21},
 {'tag': 'Chainlink', 'total_pages': 25},
 {'tag': 'Cardano', 'total_pages': 26},
 {'tag': 'solana', 'total_pages': 91}]

### cointelegraph

In [11]:
import requests

# Target URL
url = "https://cointelegraph.com/"

# Realistic User-Agent (Chrome on Windows)
headers = {
    'User-Agent': (
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
        'AppleWebKit/537.36 (KHTML, like Gecko) '
        'Chrome/122.0.0.0 Safari/537.36'
    )
}

try:
    response = requests.get(url, headers=headers)

    # Print status code
    print(f"\nStatus Code: {response.status_code}")

    # Print full HTML content
    print("\n----- Full Response Content -----\n")
    print(response.text)

except requests.exceptions.RequestException as e:
    print(f"Error occurred: {e}")



Status Code: 200

----- Full Response Content -----

<!doctype html>
<html data-n-head-ssr dir="ltr" lang="en" data-n-head="%7B%22dir%22:%7B%22ssr%22:%22ltr%22%7D,%22lang%22:%7B%22ssr%22:%22en%22%7D%7D">
  <head >
    <meta data-n-head="ssr" data-hid="sentry-trace" name="sentry-trace" content="8f736c0a0d2541299bf81a2706e76caf-9bf5e7a6f10db531-0"><meta data-n-head="ssr" data-hid="sentry-baggage" name="baggage" content="sentry-environment=production,sentry-release=release-455,sentry-public_key=26d85e1737194cc392dd8f51e66f7082,sentry-trace_id=8f736c0a0d2541299bf81a2706e76caf,sentry-sample_rate=0.1,sentry-transaction=GET%20%2F,sentry-sampled=false"><meta data-n-head="ssr" charset="utf-8"><meta data-n-head="ssr" name="viewport" content="width=device-width, initial-scale=1"><meta data-n-head="ssr" name="apple-mobile-web-app-title" content="Cointelegraph"><meta data-n-head="ssr" name="application-name" content="Cointelegraph"><meta data-n-head="ssr" name="msapplication-TileColor" content="#1

In [15]:
import requests
from bs4 import BeautifulSoup

# Target URL
url = "https://cointelegraph.com/"

# Headers
headers = {
    'User-Agent': (
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
        'AppleWebKit/537.36 (KHTML, like Gecko) '
        'Chrome/122.0.0.0 Safari/537.36'
    )
}

try:
    response = requests.get(url, headers=headers)
    print(f"Status Code: {response.status_code}")

    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all post headers
    articles = soup.find_all('header', {'data-testid': 'post-card-header'})

    print("\n📰 Headlines, Links, and Timestamps:\n")

    for header in articles:
        # Headline
        title_tag = header.find('span', {'data-testid': 'post-card-title'})
        title = title_tag.text.strip() if title_tag else "No Title"

        # Link
        # link_tag = header.find('a')
        # link = 'https://cointelegraph.com' + link_tag['href'] if link_tag else "No Link"

        # Get datetime from sibling footer
        parent = header.parent
        time_tag = parent.find('time', {'data-testid': 'post-card-published-date'})
        # time_text = time_tag.text.strip() if time_tag else "No Time"
        datetime_attr = time_tag['datetime'] if time_tag and time_tag.has_attr('datetime') else "No Datetime"

        # Print all info
        print(f"🗞️ Title: {title}")
        # print(f"🔗 Link: {link}")
        # print(f"🕒 Published (Visible): {time_text}")
        print(f"📅 Datetime Attribute: {datetime_attr}\n")

except requests.exceptions.RequestException as e:
    print(f"❌ Error: {e}")


Status Code: 200

📰 Headlines, Links, and Timestamps:

🗞️ Title: Why is the crypto market down today?
📅 Datetime Attribute: 2025-04-16

🗞️ Title: Why is Bitcoin price down today?
📅 Datetime Attribute: 2025-04-16

🗞️ Title: Italy finance minister warns US stablecoins pose bigger threat than tariffs
📅 Datetime Attribute: 2025-04-16

🗞️ Title: OKX reenters US market following $505M DOJ settlement
📅 Datetime Attribute: 2025-04-16

🗞️ Title: Sony’s Soneium taps EigenLayer to cut finality to under 10 seconds
📅 Datetime Attribute: 2025-04-16

🗞️ Title: Bitcoin trader sees gold 'blow-off top' as XAU nears new $3.3K record
📅 Datetime Attribute: 2025-04-16

🗞️ Title: How trade wars impact stocks and crypto
📅 Datetime Attribute: 2025-04-16

🗞️ Title: Bitcoin’s wide price range to continue, no longer a ‘long only’ bet  — Analyst
📅 Datetime Attribute: 2025-04-16

🗞️ Title: Chipmaker stocks slide as Nvidia faces $5.5B charge with US restrictions
📅 Datetime Attribute: 2025-04-16

🗞️ Title: Brazil’s M

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import time
import csv
from urllib.parse import urlparse

# List of tag URLs
tag_urls = [
    "https://cointelegraph.com/ada-price-index",
    "https://cointelegraph.com/binance-coin-price-index",
    "https://cointelegraph.com/ethereum-price",
    "https://cointelegraph.com/tether-price-index",
    "https://cointelegraph.com/solana-price-index",
    "https://cointelegraph.com/bitcoin-price",
    "https://cointelegraph.com/doge-price-index",
    "https://cointelegraph.com/avalanche-price-index",
    "https://cointelegraph.com/chainlink-price-index",
    "https://cointelegraph.com/polkadot-price-index"
]

MAX_PAGES = 500 

for url in tag_urls:
    buffer_data = []

    # Extract tag from URL
    path = urlparse(url).path.strip("/").replace("-price-index", "").replace("-price", "")
    tag = path

    print(f"\n🔍 Scraping tag: {tag}")

    base_url = "https://cointelegraph.com"
    static_url = url

    session = requests.Session()
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/122.0.0.0 Safari/537.36"
    }

    def save_to_csv(data, page_number):
        with open(f"{tag}_articles.csv", mode="a", newline="", encoding="utf-8") as file:
            writer = csv.DictWriter(file, fieldnames=["tag", "headline", "summary", "author", "date", "page", "total_pages_scraped"])
            if file.tell() == 0:
                writer.writeheader()
            for row in data:
                row["total_pages_scraped"] = page_number
                writer.writerow(row)

    # Static scrape
    response = session.get(static_url, headers=headers)
    page = 1

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        articles = soup.find_all("article", {"data-testid": ["article-card article-card-inline", "article-card article-card-card"]})

        for article in articles:
            title_tag = article.find("a", {"data-testid": "article-card-title"})
            headline = title_tag.text.strip() if title_tag else "No title"
            summary_tag = article.find("div", {"data-testid": "article-card-lead"})
            summary = summary_tag.text.strip() if summary_tag else "No summary"
            author_tag = article.find("a", {"data-testid": "article-card-author"})
            author = author_tag.text.strip() if author_tag else "Unknown"
            time_tag = article.find("time", {"data-testid": "article-card-published-at"})
            date_str = time_tag.text.strip() if time_tag else "Unknown"

            buffer_data.append({
                "tag": tag,
                "headline": headline,
                "summary": summary,
                "author": author,
                "date": date_str,
                "page": page
            })

    save_to_csv(buffer_data, page)
    buffer_data.clear()

    # Dynamic scrape
    graphql_url = f"{base_url}/v1/"
    graphql_headers = headers.copy()
    graphql_headers.update({
        "Content-Type": "application/json",
        "Accept": "application/json",
        "Origin": base_url,
        "Referer": static_url
    })

    offset = 0
    length = 1000
    page = 2
    has_more = True

    while has_more and page <= MAX_PAGES:
        print(f"📡 Fetching {tag} posts at offset {offset} (Page {page})...")
        payload = {
            "operationName": "TagPostsQuery",
            "query": """query TagPostsQuery($short: String, $slug: String!, $offset: Int!, $length: Int!) {
              locale(short: $short) {
                tag(slug: $slug) {
                  posts(offset: $offset, length: $length) {
                    hasMorePosts
                    data {
                      postTranslate {
                        title
                        leadText
                        published
                        author {
                          authorTranslates {
                            name
                          }
                        }
                      }
                    }
                  }
                }
              }
            }""",
            "variables": {
                "short": "en",
                "slug": tag,
                "offset": offset,
                "length": length
            }
        }

        res = session.post(graphql_url, headers=graphql_headers, json=payload)
        if res.status_code == 200:
            try:
                data = res.json()
                posts = data["data"]["locale"]["tag"]["posts"]["data"]
                has_more = data["data"]["locale"]["tag"]["posts"]["hasMorePosts"]

                for post in posts:
                    translate = post.get("postTranslate", {})
                    author_list = translate.get("author", {}).get("authorTranslates", [])
                    author = author_list[0]["name"] if author_list else "Unknown"
                    buffer_data.append({
                        "tag": tag,
                        "headline": translate.get("title", "No title"),
                        "summary": translate.get("leadText", "No summary"),
                        "author": author,
                        "date": translate.get("published", "Unknown"),
                        "page": page
                    })

                offset += length
                page += 1
                time.sleep(1)

                if page % 10 == 0:
                    save_to_csv(buffer_data, page)
                    buffer_data.clear()

            except Exception as e:
                print("❌ Error parsing response:", e)
                break
        else:
            print(f"❌ Failed at offset {offset} | Status: {res.status_code}")
            break

    if buffer_data:
        save_to_csv(buffer_data, page)

    print(f"✅ Finished scraping {tag}\n")



### Price scrape

In [None]:
import yfinance as yf
import pandas as pd

# Crypto symbols with Yahoo Finance codes
symbols = {
    "BTC": "BTC-USD",
    "ETH": "ETH-USD",
    "BNB": "BNB-USD",
    "DOGE": "DOGE-USD",
    "USDT": "USDT-USD",
    "SOL": "SOL-USD",
    "AVAX": "AVAX-USD",
    "LTC": "LTC-USD",
    "LINK": "LINK-USD",
    "ADA": "ADA-USD",
    "DOT": "DOT-USD"
}

start_date = "2013-01-01"
end_date = "2025-01-01"

# Loop through and save each crypto's historical data
for name, ticker in symbols.items():
    print(f"Fetching {name} ({ticker})...")
    data = yf.download(ticker, start=start_date, end=end_date, interval="1d")
    if not data.empty:
        data.to_csv(f"{name}_from_2013.csv")
        print(f" Saved {name} data: {len(data)} rows")
    else:
        print(f"⚠️ No data found for {name}")


### Cryptpanic

In [8]:
import requests

# URL of the page you want to scrape
url = "https://finance.yahoo.com/quote/BTC-USD/news/"

# Make the request to get the page content
headers = {
    "User-Agent": "Mozilla/5.0"
}
response = requests.get(url, headers=headers)

# Get the full HTML content of the page
html_content = response.text

# Print the full HTML content of the page
print(html_content)


<!doctype html>
<html lang="en-US" theme="auto" data-color-theme-enabled="true" data-color-scheme="auto" class="desktop neo-green dock-upscale">
    <head>
        <meta charset="utf-8" />
        <meta name="oath:guce:consent-host" content="guce.yahoo.com" />
            function _nimbusSendEVLoadEvent() {
                if (_nimbusEvLoad._player){
                    window.finNeoEVReady = Date.now();
                    window.dispatchEvent(new CustomEvent('NIMBUS_EV_READY',{detail: {}}));
                }
            }
            function onNimbusEVPlayerReady(){_nimbusEvLoad._player = true;_nimbusSendEVLoadEvent();}</script><script type="module">if(!window.finWebCore){window.finWebCore=function r(e){const{isModern:t=!0,isDev:i=!1,lang:a=s,devAssets:o,prodAssets:r,crumb:n="",features:c=[],strings:d}=e;let f={};const m=a.substring(a.lastIndexOf("-")+1);return{crumb:n,lang:a,region:m,features:c,store:{},intl:m.toLowerCase(),strings:d,assets:i?o:r,addScriptTag(e,s,t){if(!e)return;c

In [9]:
import requests
from bs4 import BeautifulSoup

# URL of the Yahoo Finance page containing the data
url = "https://finance.yahoo.com/quote/BTC-USD/news/"

# Make the request to get the page content
headers = {
    "User-Agent": "Mozilla/5.0"
}
response = requests.get(url, headers=headers)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find all the articles (stream-item) on the page
articles = soup.find_all('li', class_='stream-item story-item yf-1usaaz9')

# Loop through each article and extract the details
for article in articles:
    # Extracting the title of the article
    title_tag = article.find('h3', class_='clamp yf-1y7058a')
    title = title_tag.get_text() if title_tag else "No title found"
    
    # Extracting the article link
    link_tag = article.find('a', class_='subtle-link fin-size-small thumb yf-1xqzjha')
    link = link_tag['href'] if link_tag else "No link found"
    
    # Extracting the description of the article
    description_tag = article.find('p', class_='clamp yf-1y7058a')
    description = description_tag.get_text() if description_tag else "No description found"
    
    # Extracting the publisher and publication time
    publisher_tag = article.find('div', class_='publishing yf-1weyqlp')
    publisher = publisher_tag.get_text().strip() if publisher_tag else "No publisher found"

    # Extracting the article date
    date = publisher.split('•')[-1].strip() if publisher else "No date found"
    
    # Print the extracted data
    print(f"Title: {title}")
    print(f"Link: https://finance.yahoo.com{link}")
    print(f"Description: {description}")
    print(f"Publisher: {publisher}")
    print(f"Date: {date}")
    print("-" * 80)


Title: Stock market today: Dow rises 400 points, Nasdaq gains 2.5%, but mega-rally fades in late trading
Link: https://finance.yahoo.comhttps://finance.yahoo.com/news/live/stock-market-today-dow-rises-400-points-nasdaq-gains-25-but-mega-rally-fades-in-late-trading-133419757.html
Description: US stocks jumped after President Trump said he has "no intention" of firing Federal Reserve Chair Jerome Powell, easing fears on Wall Street that the central bank's independence was under threat.
Publisher: Yahoo Finance • 15 hours ago
Date: 15 hours ago
--------------------------------------------------------------------------------
Title: GE Vernova & AT&T outlook, bitcoin & crypto gains: Trending Tickers
Link: https://finance.yahoo.comhttps://finance.yahoo.com/video/ge-vernova-t-outlook-bitcoin-152907412.html
Description: Brad Smith and Madison Mills take a closer look at some of the trending tickers of today's trading session on Wealth. GE Vernova (GEV) maintains its full-year outlook despite a

In [11]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import time

# URL of the Yahoo Finance page containing the data
url = "https://finance.yahoo.com/quote/BTC-USD/news/"

# Make the request to get the page content
headers = {
    "User-Agent": "Mozilla/5.0"
}
response = requests.get(url, headers=headers)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find all the articles (stream-item) on the page
articles = soup.find_all('li', class_='stream-item story-item yf-1usaaz9')

# Function to convert relative time to absolute date
def get_exact_date(relative_time):
    current_time = time.time()  # Get current timestamp

    if 'day' in relative_time:
        # Handling relative time like "2 days ago" or "1 day ago"
        if 'yesterday' in relative_time.lower():
            # "yesterday"
            exact_time = current_time - (1 * 86400)  # 1 day ago
        else:
            # "2 days ago", "3 days ago", etc.
            days_ago = int(relative_time.split()[0])
            exact_time = current_time - (days_ago * 86400)
    elif 'hour' in relative_time:
        # Handling relative time like "2 hours ago"
        hours_ago = int(relative_time.split()[0])
        exact_time = current_time - (hours_ago * 3600)
    elif 'minute' in relative_time:
        # Handling relative time like "30 minutes ago"
        minutes_ago = int(relative_time.split()[0])
        exact_time = current_time - (minutes_ago * 60)
    else:
        # If it's not a relative time, return the original value (like a full date)
        exact_time = current_time

    # Convert to exact datetime string
    return datetime.utcfromtimestamp(exact_time).strftime('%Y-%m-%d %H:%M:%S')

# Loop through each article and extract the details
for article in articles:
    # Extracting the title of the article
    title_tag = article.find('h3', class_='clamp yf-1y7058a')
    title = title_tag.get_text() if title_tag else "No title found"
    
    # Extracting the article link
    link_tag = article.find('a', class_='subtle-link fin-size-small thumb yf-1xqzjha')
    link = link_tag['href'] if link_tag else "No link found"
    
    # Extracting the description of the article
    description_tag = article.find('p', class_='clamp yf-1y7058a')
    description = description_tag.get_text() if description_tag else "No description found"
    
    # Extracting the publisher and publication time
    publisher_tag = article.find('div', class_='publishing yf-1weyqlp')
    publisher = publisher_tag.get_text().strip() if publisher_tag else "No publisher found"

    # Extracting the article date
    date_tag = article.find('div', class_='publishing yf-1weyqlp')
    if date_tag:
        date = date_tag.get_text().strip().split('•')[-1].strip()
        date = get_exact_date(date)  # Convert relative date to exact date
    else:
        date = "No date found"
    
    # Print the extracted data
    print(f"Title: {title}")
    print(f"Link: https://finance.yahoo.com{link}")
    print(f"Description: {description}")
    print(f"Publisher: {publisher}")
    print(f"Date: {date}")
    print("-" * 80)


Title: Stock market today: Dow rises 400 points, Nasdaq gains 2.5%, but mega-rally fades in late trading
Link: https://finance.yahoo.comhttps://finance.yahoo.com/news/live/stock-market-today-dow-rises-400-points-nasdaq-gains-25-but-mega-rally-fades-in-late-trading-133419757.html
Description: US stocks jumped after President Trump said he has "no intention" of firing Federal Reserve Chair Jerome Powell, easing fears on Wall Street that the central bank's independence was under threat.
Publisher: Yahoo Finance • 15 hours ago
Date: 2025-04-23 21:40:43
--------------------------------------------------------------------------------
Title: GE Vernova & AT&T outlook, bitcoin & crypto gains: Trending Tickers
Link: https://finance.yahoo.comhttps://finance.yahoo.com/video/ge-vernova-t-outlook-bitcoin-152907412.html
Description: Brad Smith and Madison Mills take a closer look at some of the trending tickers of today's trading session on Wealth. GE Vernova (GEV) maintains its full-year outlook de

  return datetime.utcfromtimestamp(exact_time).strftime('%Y-%m-%d %H:%M:%S')


In [None]:
import requests
import json

# Define the base URL
url = 'https://finance.yahoo.com/xhr/ncp?location=US&queryRef=newsAll&serviceKey=ncp_fin&listName=BTC-USD-news-related&lang=en-US&region=US'

# Set headers as required (you might need to check what headers the site expects)
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Content-Type': 'application/json',
}

# Define the initial UUIDs for the first page (this can change per iteration)
initial_uuids = [
    {"id": "28a9093e-6174-390f-9ea2-f553b767db11", "type": "ymedia:type=story"},
    {"id": "15d5a9a8-21ae-3498-b269-baccbdf33b56", "type": "ymedia:type=cavideo"}
]

# Define the initial payload (you can dynamically update 'uuids' on each page)
payload = {
    "payload": {
        "gqlVariables": {
            "tickerStream": {
                "pagination": {
                    "uuids": initial_uuids
                }
            }
        }
    }
}

# Make a POST request to fetch data (you may need to update the pagination part for subsequent requests)
response = requests.post(url, headers=headers, data=json.dumps(payload))

# Parse the response
data = response.json()

# Example: Print the response to see the fetched data
print(json.dumps(data, indent=4))

# Loop to simulate pagination (you will need logic to get the next set of UUIDs)
for page in range(2, 6):  # Looping through 5 pages (you can extend this)
    new_uuids = [
        {"id": f"new-uuid-{page}-1", "type": "ymedia:type=story"},
        {"id": f"new-uuid-{page}-2", "type": "ymedia:type=cavideo"}
    ]
    
    # Update the payload with new UUIDs
    payload["payload"]["gqlVariables"]["tickerStream"]["pagination"]["uuids"] = new_uuids

    # Send the request for the next page
    response = requests.post(url, headers=headers, data=json.dumps(payload))
    
    # Parse and print the next page of data
    data = response.json()
    print(f"Page {page} Data: {json.dumps(data, indent=4)}")


In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Target URL
url = "https://cryptopanic.com/news/bitcoin/"

# Custom headers to simulate a browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# Make HTTP GET request
response = requests.get(url, headers=headers)

# Parse HTML content
soup = BeautifulSoup(response.text, "html.parser")
print(soup+)
# Select all news rows
news_rows = soup.select("div.news-row.news-row-link")

print(news_rows)

# Loop through each news item
# for row in news_rows:
#     # Extract the title
#     title_tag = row.select_one("a.nc-title span.title-text > span")
#     title = title_tag.get_text(strip=True) if title_tag else "No Title"

#     # Extract the full link
#     link_tag = row.select_one("a.nc-title")
#     link = urljoin(url, link_tag["href"]) if link_tag else "No Link"

#     # Extract the datetime
#     time_tag = row.select_one("a.nc-date time")
#     published = time_tag["datetime"] if time_tag and "datetime" in time_tag.attrs else "No datetime found"

    # Print results
    # print(f"📰 Title: {title}")
    # print(f"🔗 Link: {link}")
    # print(f"🕒 Published: {published}")
    # print("-" * 80)


<!DOCTYPE html>

<html class="vue-app" lang="en">
<head>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=0" name="viewport"/>
<meta content="telephone=no" name="format-detection"/>
<title>Bitcoin $BTC Real-time News | CryptoPanic</title><meta content="CryptoPanic" name="author"/>
<link href="/news/rss/" rel="alternate" title="RSS" type="application/rss+xml"/>
<meta content="1848168952115035" property="fb:app_id"/>
<meta content="100005758416232" property="fb:admins"/>
<meta content="en_us" property="og:locale"/>
<meta content="CryptoPanic" property="og:site_name"/>
<meta content="always" name="referrer"/>
<meta content="app-id=1290506871" name="apple-itunes-app"/>
<meta content="Bitcoin $BTC aggregated real-time news feed on CryptoPanic" name="description"/>
<meta content="https://cryptopanic.com/news/bitcoin/" property="og:url"/