# Download Historical Stock Data

### Prepare Constants

In [None]:
# Configuration
TICKER_LIST = [
    "AAPL", "TSLA", "GOOG", "NVDA", "O", "BA"
]

start_date = '2023-12-31'
end_date = '2025-01-01'

CSV_DIR = "./stock_csv_data"

### Download Stock Data and save to csv
One .csv per Ticker

In [None]:
import os
import pandas as pd
from yahooquery import Ticker

os.makedirs(CSV_DIR, exist_ok=True)

for TICKER in TICKER_LIST:
    print(f"📥 Downloading {TICKER}")

    # Fetch data
    ticker = Ticker(TICKER)
    data = ticker.history(start=start_date, end=end_date, interval="1d")
    print(f"✅ {TICKER} data fetched.")

    # Skip if data is empty
    if data.empty:
        print(f"⚠️ No data for {TICKER}, skipping.")
        continue

    # Reset index to expose "date" as a column
    data = data.reset_index()

    # Check for "date" column existence
    if "date" not in data.columns:
        print(f"⚠️ 'date' column missing for {TICKER}, skipping.")
        continue

    # Convert and localize datetime
    data["date"] = pd.to_datetime(data["date"], errors='coerce')
    data = data.dropna(subset=["date"])
    if data["date"].dt.tz is None:
        data["date"] = data["date"].dt.tz_localize("UTC")

    # Save to CSV
    csv_path = os.path.join(CSV_DIR, f"{TICKER}.csv")
    data.to_csv(csv_path, index=False)
    print(f"📝 CSV gespeichert: {csv_path}")




### Index Stock data from .csv to InfluxDB


In [None]:
import os
import pandas as pd
import datetime
from influxdb_client import InfluxDBClient, Point, WriteOptions

# 🔧 InfluxDB-Konfiguration
INFLUXDB_URL = "http://localhost:10896"
INFLUXDB_TOKEN = "14iJvsBJKp37nLXjIZvE4RbAoEO2dNs1k0GvCbKuJUnF_ub4pSWWw80O739jabLPMD-XBzA72WSX9f-4FuDBQ=="
INFLUXDB_ORG = "bdinf-org"
INFLUXDB_BUCKET = "bdinf-bucket"

# 📁 Pfad zu CSV-Dateien (musst du ggf. anpassen)
CSV_DIR = "./stock_csv_data"  # <== SETZE HIER DEINEN CSV-PFAD EIN

# 🔌 InfluxDB-Client initialisieren
client = InfluxDBClient(
    url=INFLUXDB_URL,
    token=INFLUXDB_TOKEN,
    org=INFLUXDB_ORG
)

write_api = client.write_api(write_options=WriteOptions(
    batch_size=5000,
    flush_interval=5_000,
    jitter_interval=1_000,
    retry_interval=5_000,
    max_retries=5,
    max_retry_delay=30_000,
    exponential_base=2
))

# 📤 CSV-Dateien verarbeiten und hochladen
for ticker_file in os.listdir(CSV_DIR):
    if not ticker_file.endswith(".csv"):
        continue

    TICKER = os.path.splitext(ticker_file)[0]
    print(f"📤 Uploading {TICKER} to InfluxDB")

    data_path = os.path.join(CSV_DIR, ticker_file)
    data = pd.read_csv(data_path)

    if data.empty or "date" not in data.columns:
        print(f"⚠️ Skipping {TICKER}, invalid or empty data")
        continue

    data["date"] = pd.to_datetime(data["date"], errors='coerce')
    data = data.dropna(subset=["date"])
    if data.empty:
        print(f"⚠️ Skipping {TICKER}, no valid dates after cleaning")
        continue

    points = []
    for _, row in data.iterrows():
        timestamp = row["date"]
        if pd.isna(timestamp):
            continue

        if timestamp.tzinfo is None:
            timestamp = timestamp.tz_localize("UTC")
        else:
            timestamp = timestamp.astimezone(datetime.timezone.utc)

        try:
            point = (
            Point("hist_stock_data")
            .tag("ticker", row["symbol"])  # Aus CSV direkt
            .field("open", float(row["open"]))
            .field("high", float(row["high"]))
            .field("low", float(row["low"]))
            .field("close", float(row["close"]))
            .field("adjclose", float(row["adjclose"]))  # kleiner Buchstabe "c"
            .field("volume", int(row["volume"]))
            .time(timestamp)
            )
            points.append(point)
        except Exception as e:
            print(f"❌ Error in {TICKER} row: {e}")
            continue

    if points:
        write_api.write(bucket=INFLUXDB_BUCKET, org=INFLUXDB_ORG, record=points)
        print(f"✅ {TICKER} data written to InfluxDB\n")
    else:
        print(f"⚠️ No valid data points for {TICKER}\n")

# 🔒 Verbindung schließen
client.close()
print("Import abgeschlossen. Verbindung geschlossen.")