#### Fetch Crypto News from CryptoPanic

In [16]:
import requests
import os
import pandas as pd
from datetime import datetime
import json

def fetch_crypto_panic_news(currency, filter):
    """
    Fetches Bitcoin news headlines from CryptoPanic API and saves them to a CSV file.
    """
    CRYPTO_PANIC_API_KEY = os.getenv("CRYPTO_PANIC_API_KEY")  # replace with your real key
    N_PAGES = 100 # ~50 articles per page

    all_articles = []

    for page in range(1, N_PAGES + 1):
        url = f"https://cryptopanic.com/api/v1/posts/?auth_token={CRYPTO_PANIC_API_KEY}&currencies={currency}&filter={filter}&page={page}"
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Error on page {page}")
            continue
        data = response.json()


        for item in data["results"]:
            all_articles.append({
                "date": item["published_at"][:10],  # just the date part
                "headline": item["title"]

            })
    return all_articles



all_articles = fetch_crypto_panic_news("BTC", "hot")

# Create DataFrame
news_df = pd.DataFrame(all_articles)
news_df['date'] = pd.to_datetime(news_df['date'])
news_df = news_df.drop_duplicates()

# # Optional: filter to last 45 days only
# news_df = news_df[news_df['date'] >= (news_df['date'].max() - pd.Timedelta(days=45))]

# Save to CSV
news_df.to_csv("../data/bitcoin_news_headlines.csv", index=False)
news_df.head()


Unnamed: 0,date,headline
0,2025-04-29,Ethereum’s ‘capitulation’ suggests ETH price i...
1,2025-04-28,Arizona state approves first ever US Bitcoin r...
2,2025-04-27,Bitcoin Continues To Flow Out Of Major Exchang...
3,2025-04-26,Swiss National Bank Rejects Bitcoin Reserve Pr...
4,2025-04-26,XRP price prediction as world’s first spot ETF...


In [2]:
print(url)

https://cryptopanic.com/api/v1/posts/?auth_token=6b63b32e0c996106f4fd7b8c39dd3e78ee29ec94&currencies=BTC&page=5


In [3]:
import pandas as pd

df = pd.read_csv("../data/bitcoin_news_headlines.csv")

# Preview headline count per day
headline_counts = df.groupby("date").size().sort_index()
print(headline_counts.tail())

# Optional: filter days with fewer than 3 headlines
df = df[df['date'].isin(headline_counts[headline_counts >= 3].index)]

df.head()

date
2025-04-09    68
2025-04-10    32
dtype: int64


Unnamed: 0,date,headline
0,2025-04-10,Bitcoin hits $82K as Trump tariff reversal tri...
1,2025-04-10,Bitwise doubles down on $200K Bitcoin price pr...
2,2025-04-10,Pakistan Turns Extra Electricity into Bitcoin ...
3,2025-04-10,"Crypto Market Today: Trump Tariff Pauses, Bitc..."
4,2025-04-10,Metaplanet Gets First Third-Party Research Cov...


In [8]:



N_PAGES = 10  # go deeper than before

all_articles = []

for page in range(1, N_PAGES + 1):
    url = f"https://cryptopanic.com/api/v1/posts/?auth_token={CRYPTO_PANIC_API_KEY}&currencies=BTC&filter=hot&page={page}"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error on page {page}")
        continue

    data = response.json()
    for item in data["results"]:
        # Parse only news up to April 7
        date_str = item["published_at"][:10]
        if date_str > "2025-04-07":
            continue
        all_articles.append({
            "date": date_str,
            "headline": item["title"]
        })

# Save filtered results
news_df = pd.DataFrame(all_articles)
news_df.to_csv("../data/bitcoin_news_headlines_extended.csv", index=False)


In [9]:
news_df.head()

Unnamed: 0,date,headline
0,2025-04-07,Markets are crashing. Bitcoin at 75k
1,2025-04-05,Bitcoin shows signs of decoupling from US equi...
2,2025-04-04,The future of DeFi isn’t on Ethereum — it’s on...
3,2025-04-03,Analyst Warns of Massive Ethereum Drop Against...
4,2025-04-03,Ethereum faces a storm: Could the crypto giant...


In [10]:
import pandas as pd

df = pd.read_csv("../data/bitcoin_news_headlines_extended.csv")

# Preview headline count per day
headline_counts = df.groupby("date").size().sort_index()
print(headline_counts.tail())

# Optional: filter days with fewer than 3 headlines
df = df[df['date'].isin(headline_counts[headline_counts >= 3].index)]

df.head()

date
2025-04-02    1
2025-04-03    3
2025-04-04    1
2025-04-05    1
2025-04-07    1
dtype: int64


Unnamed: 0,date,headline
3,2025-04-03,Analyst Warns of Massive Ethereum Drop Against...
4,2025-04-03,Ethereum faces a storm: Could the crypto giant...
5,2025-04-03,Trader Says Ethereum Headed for a Massive Cras...
7,2025-04-01,GameStop Completes $1.5 Billion Offering to Fu...
8,2025-04-01,Ethereum prints 4 consecutive red monthly cand...
