# finale Webscrapping schleifen für Supabase

## Codeblock: Web Scrapping

In [None]:
'''import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_news(url):
    # Seite laden
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Alle News-Container finden
    main_blocks = soup.find_all("div", class_="news__item")

    data = []

    for block in main_blocks:
        # Titel und Zeit in jedem Block suchen
        title_tag = block.find("div", class_="news__item-title")
        time_tag = block.find("div", class_="humble")

        if title_tag and time_tag:
            title = title_tag.get_text(strip=True)

            timing_raw = time_tag.get_text(strip=True)
            # Uhrzeit entfernen, falls mit " - " getrennt (z.B. "2025-06-12 - 14:35")
            date_only = timing_raw.split(' - ')[0]

            data.append({
                "date": date_only,
                "title": title
            })

    # DataFrame erstellen
    df = pd.DataFrame(data)
    return df

# URL mit Suchbegriff bitcoin
url = "https://u.today/search/node?keys=bitcoin"

# News-Daten scrapen
df = scrape_news(url)

# CSV speichern (optional)
df.to_csv("news_ticker.csv", index=False)

print("✔️ CSV erfolgreich erstellt mit", len(df), "Einträgen")
print(df.head())  # Zeige die ersten Zeilen des DataFrames'''

✔️ CSV erfolgreich erstellt mit 55 Einträgen
           date                                              title
0  Jun 12, 2025     Bitcoin (BTC) to Crash to $90,000? Price Falls
1  Jun 12, 2025  Bitcoin Bull Novogratz Says Strategy Copycats ...
2  Jun 12, 2025         BlackRock's Bitcoin ETF Breaks Into Top 20
3  Jun 12, 2025  Solana (SOL) Golden Cross Secured? Shiba Inu (...
4  Jun 11, 2025   $347,000,000 in Bitcoin (BTC) Moved in Two Hours


In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

# Webseite laden
url = "https://u.today/news"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# News-Container finden
main_blocks = soup.find_all("div", class_="news__item")

data = []

for block in main_blocks:
    title_tag = block.find("div", class_="news__item-title")
    time_tag = block.find("div", class_="humble")

    if title_tag and time_tag:
        title = title_tag.get_text(strip=True)
        timing_raw = time_tag.get_text(strip=True)

        # Nur das Datum extrahieren, falls Format "Jun 12, 2025 - 09:30"
        date_part = timing_raw.split(" - ")[0].strip()

        try:
            # In ISO-Format "2025-06-12" konvertieren
            date_obj = datetime.strptime(date_part, "%b %d, %Y")
            formatted_date = date_obj.strftime("%Y-%m-%d")
        except ValueError:
            # Wenn Parsing fehlschlägt, überspringen
            print(f"⚠️ Ungültiges Datumsformat: {date_part}")
            continue

        data.append({
            "date": formatted_date,
            "title": title
        })

# DataFrame erstellen und CSV exportieren
df = pd.DataFrame(data)
df.to_csv("news_ticker.csv", index=False)
print("✔️ CSV erfolgreich erstellt mit", len(df), "Einträgen")


✔️ CSV erfolgreich erstellt mit 45 Einträgen


# Senden an SUPABASE

In [5]:
import os
import psycopg
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()
dbconn = os.getenv("DATABASE_URL")

def insert_df_to_db(df):
    with psycopg.connect(dbconn) as conn:
        with conn.cursor() as cur:
            for _, row in df.iterrows():
                try:
                    # Datum im Format YYYY-MM-DD parsen
                    date = datetime.strptime(row['date'], "%Y-%m-%d").date()
                    title = row['title']

                    cur.execute(
                        '''
                        INSERT INTO news_ticker(date, title)
                        VALUES (%s, %s)
                        ON CONFLICT DO NOTHING;
                        ''',
                        (date, title)
                    )
                except Exception as e:
                    print(f"❌ Fehler bei Zeile: {row}\n{e}")

    print("✅ Datenbank aktualisiert.")

# df ist dein DataFrame aus dem Scraper
insert_df_to_db(df)


✅ Datenbank aktualisiert.
