In [22]:
%pip install yfinance




In [23]:
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("FINNHUB_API_KEY")

print("API Key:", os.getenv("FINNHUB_API_KEY"))


API Key: cvna_value


In [24]:
import os
import time
import requests
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime
import yfinance as yf

def ingest_stock_data(symbol="CVNA", start_date="2023-01-01", resolution="D", output_dir="data/raw"):
    load_dotenv()
    api_key = os.getenv("FINNHUB_API_KEY")

    from_ts = int(time.mktime(time.strptime(start_date, "%Y-%m-%d")))
    to_ts = int(time.time())

    url = "https://finnhub.io/api/v1/stock/candle"
    params = {
        "symbol": symbol,
        "resolution": resolution,
        "from": from_ts,
        "to": to_ts,
        "token": api_key
    }

    print(f"🔄 Attempting Finnhub API for {symbol}...")
    try:
        response = requests.get(url, params=params)
        data = response.json()

        if data.get("s") == "ok":
            df = pd.DataFrame({
                "timestamp": pd.to_datetime(data["t"], unit="s"),
                "open": data["o"],
                "high": data["h"],
                "low": data["l"],
                "close": data["c"],
                "volume": data["v"]
            })
            source = "finnhub"
        else:
            print(f"⚠️ Finnhub failed: {data}")
            df = None
    except Exception as e:
        print(f"❌ Finnhub exception: {e}")
        df = None

    # Fallback to yfinance
    if df is None:
        print(f"🔁 Falling back to yfinance for {symbol}...")
        try:
            ticker = yf.Ticker(symbol)
            df = ticker.history(start=start_date).reset_index()
            df = df.rename(columns={
                "Date": "timestamp",
                "Open": "open",
                "High": "high",
                "Low": "low",
                "Close": "close",
                "Volume": "volume"
            })
            source = "yfinance"
        except Exception as e:
            print(f"❌ yfinance exception: {e}")
            return None

    # Validate
    if df.empty or df.isna().sum().sum() > 0:
        print("⚠️ DataFrame is empty or contains NaNs.")
        return None

    # Save
    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d-%H%M")
    filename = f"{output_dir}/api_{source}_{symbol.lower()}_{timestamp}.csv"
    df.to_csv(filename, index=False)
    print(f"✅ Data saved to {filename}")
    return df

In [25]:
print("API response:", data)

API response: {'error': 'Invalid API key.'}


In [26]:
df_cvna = ingest_stock_data(symbol="CVNA", start_date="2023-01-01")
print(df_cvna)

🔄 Attempting Finnhub API for CVNA...
⚠️ Finnhub failed: {'error': 'Invalid API key.'}
🔁 Falling back to yfinance for CVNA...
✅ Data saved to data/raw/api_yfinance_cvna_20250818-1016.csv
                    timestamp        open        high         low       close  \
0   2023-01-03 00:00:00-05:00    4.810000    4.880000    4.420000    4.630000   
1   2023-01-04 00:00:00-05:00    4.660000    5.098000    4.555000    5.040000   
2   2023-01-05 00:00:00-05:00    4.870000    4.920000    4.510000    4.550000   
3   2023-01-06 00:00:00-05:00    4.590000    4.590000    4.230000    4.410000   
4   2023-01-09 00:00:00-05:00    4.470000    4.790000    4.390000    4.440000   
..                        ...         ...         ...         ...         ...   
653 2025-08-12 00:00:00-04:00  352.000000  354.500000  335.160004  345.109985   
654 2025-08-13 00:00:00-04:00  347.785004  351.489990  343.399994  349.869995   
655 2025-08-14 00:00:00-04:00  346.450012  348.869995  340.649994  342.589996   
656 

In [28]:
import yfinance as yf
import pandas as pd

def fallback_yfinance(symbol="CVNA", start_date="2023-01-01"):
    try:
        ticker = yf.Ticker(symbol)
        df = ticker.history(start=start_date).reset_index()
        df = df.rename(columns={
            "Date": "timestamp",
            "Open": "open",
            "High": "high",
            "Low": "low",
            "Close": "close",
            "Volume": "volume"
        })
        print("✅ yfinance DataFrame created:", df.shape)
        return df
    except Exception as e:
        print("❌ yfinance failed:", e)
        return None

df_test = fallback_yfinance()

✅ yfinance DataFrame created: (658, 8)


In [32]:
from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d-%H%M")
filename = r"C:\Users\kavin\bootcamp_Kavin_Dhanasekaran\your-project\data\cvna.csv"
df = ingest_stock_data(symbol="CVNA", start_date="2023-01-01")
df.to_csv(filename, index=False)


if df is not None and not df.empty:
    df.to_csv(filename, index=False)
    print(f"✅ Saved to {filename}")
else:
    print("⚠️ DataFrame is missing or empty. Skipping save.")

🔄 Attempting Finnhub API for CVNA...
⚠️ Finnhub failed: {'error': 'Invalid API key.'}
🔁 Falling back to yfinance for CVNA...
✅ Data saved to data/raw/api_yfinance_cvna_20250818-1022.csv
✅ Saved to C:\Users\kavin\bootcamp_Kavin_Dhanasekaran\your-project\data\cvna.csv


In [35]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

def scrape_table(url, table_index=0):
    print(f"🔗 Scraping from: {url}")
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    tables = soup.find_all("table")
    if not tables:
        print("❌ No tables found.")
        return None

    table = tables[table_index]
    rows = table.find_all("tr")

    data = []
    for row in rows:
        cols = row.find_all(["td", "th"])
        data.append([col.get_text(strip=True) for col in cols])

    df = pd.DataFrame(data[1:], columns=data[0])
    print("✅ Table scraped:", df.shape)
    return df

def validate_scraped_df(df):
    if df is None or df.empty:
        print("⚠️ DataFrame is empty or missing.")
        return False

    print("📊 Columns:", df.columns.tolist())
    print("🧮 NA counts:\n", df.isna().sum())
    print("🔢 Dtypes:\n", df.dtypes)
    return True


timestamp = datetime.now().strftime("%Y%m%d-%H%M")
filename = f"data/raw/scrape_msn_cvna-overview_{timestamp}.csv"
df.to_csv(filename, index=False)
print(f"✅ Saved to {filename}")

url = "https://finance.yahoo.com/quote/CVNA/key-statistics"
df_scraped = scrape_table(url)
if validate_scraped_df(df_scraped):
    timestamp = datetime.now().strftime("%Y%m%d-%H%M")
    filename = f"data/raw/scrape_msn_cvna-overview_{timestamp}.csv"
    df_scraped.to_csv(filename, index=False)


✅ Saved to data/raw/scrape_msn_cvna-overview_20250818-1028.csv
🔗 Scraping from: https://finance.yahoo.com/quote/CVNA/key-statistics
❌ No tables found.
⚠️ DataFrame is empty or missing.


In [38]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

def scrape_finviz_summary(ticker="CVNA"):
    url = f"https://finviz.com/quote.ashx?t={ticker}"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    table = soup.find("table", class_="snapshot-table2")
    if not table:
        print("❌ Finviz table not found.")
        return None

    rows = table.find_all("tr")
    data = []
    for row in rows:
        cols = row.find_all("td")
        for i in range(0, len(cols), 2):
            label = cols[i].get_text(strip=True)
            value = cols[i+1].get_text(strip=True)
            data.append([label, value])

    df = pd.DataFrame(data, columns=["Metric", "Value"])
    print("✅ Scraped Finviz summary:", df.shape)
    return df

def validate_scraped_df(df):
    if df is None or df.empty:
        print("⚠️ DataFrame is empty or missing.")
        return False
    print("📊 Columns:", df.columns.tolist())
    print("🧮 NA counts:\n", df.isna().sum())
    print("🔢 Sample rows:\n", df.head())
    return True

def save_scraped_df(df, site="finviz", table="summary", ticker="cvna"):
    timestamp = datetime.now().strftime("%Y%m%d-%H%M")
    filename = r"C:\Users\kavin\bootcamp_Kavin_Dhanasekaran\your-project\data\cvna_scrapped.csv"
    df.to_csv(filename, index=False)
    print(f"✅ Saved to {filename}")

df_finviz = scrape_finviz_summary("CVNA")
if validate_scraped_df(df_finviz):
    save_scraped_df(df_finviz)


✅ Scraped Finviz summary: (84, 2)
📊 Columns: ['Metric', 'Value']
🧮 NA counts:
 Metric    0
Value     0
dtype: int64
🔢 Sample rows:
          Metric    Value
0         Index        -
1           P/E    89.73
2     EPS (ttm)     3.99
3   Insider Own    8.67%
4  Shs Outstand  136.50M
✅ Saved to C:\Users\kavin\bootcamp_Kavin_Dhanasekaran\your-project\data\cvna_scrapped.csv
