In [16]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import sqlite3
import os
import requests
from bs4 import BeautifulSoup

# Define constants
DB_PATH = 'database/stocks_data.db'  # Path to the SQLite database
RAW_STOCKS_TABLE = 'raw_stocks'  # Table name for storing raw stock data

# Ensure the database directory exists
if not os.path.exists(DB_PATH):
    print(f"Database will be created at {DB_PATH}")

In [17]:
# Define the URL for the Wikipedia page
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"

# Step 1: Scrape data from the Wikipedia page
response = requests.get(url)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find("table", {"id": "constituents"})
    
    # Extract data into lists
    symbols = []
    companies = []
    for row in table.find("tbody").find_all("tr")[1:]:  # Skip the header row
        columns = row.find_all("td")
        if len(columns) > 1:  # Ensure the row contains enough data
            symbols.append(columns[0].text.strip())  # Symbol column
            companies.append(columns[1].text.strip())  # Company column
else:
    print(f"Failed to fetch the webpage. Status code: {response.status_code}")
    exit()

# Step 2: Create a DataFrame
ticker_data = pd.DataFrame({
    "Symbol": symbols,
    "Company": companies
})

# Step 3: Save data to SQLite database
db_path = 'database/stocks_data.db'
table_name = 'tickers'

with sqlite3.connect(db_path) as conn:
    # Create the `tickers` table if it does not exist
    conn.execute(f"""
    CREATE TABLE IF NOT EXISTS {table_name} (
        Symbol TEXT PRIMARY KEY,
        Company TEXT
    )
    """)
    print(f"Table '{table_name}' ensured in database.")
    
    # Insert data into the table
    ticker_data.to_sql(table_name, conn, if_exists='replace', index=False)
    print(f"Ticker data saved to the '{table_name}' table successfully.")


Table 'tickers' ensured in database.
Ticker data saved to the 'tickers' table successfully.


In [18]:
# Function to initialize the database
def initialize_database():
    """
    Creates the raw_stocks table if it doesn't exist in the database.
    """
    with sqlite3.connect(DB_PATH) as conn:
        conn.execute(f"""
        CREATE TABLE IF NOT EXISTS {RAW_STOCKS_TABLE} (
            Date TEXT,
            Open REAL,
            High REAL,
            Low REAL,
            Close REAL,
            Adj_Close REAL,
            Volume INTEGER,
            Ticker TEXT,
            PRIMARY KEY (Date, Ticker)
        )
        """)
        print(f"Table {RAW_STOCKS_TABLE} ensured in database.")


In [19]:
# Function to fetch stock data and save it to the database
def fetch_and_store_stock_data(ticker):
    """
    Fetches stock data from Yahoo Finance and saves it to the database.

    Parameters:
    ticker (str): The stock ticker symbol (e.g., 'AAPL').
    """
    try:
        print(f"Fetching data for {ticker}...")

        # Fetch data from yfinance
        stock_data = yf.download(ticker, start='2000-01-01', end='2024-12-31', progress=True)

        # Add the Ticker column
        stock_data['Ticker'] = ticker

        # Reset the index to make 'Date' a column
        stock_data.reset_index(inplace=True)

        # Flatten column names if multi-indexed
        stock_data.columns = [col[0] if isinstance(col, tuple) else col for col in stock_data.columns]

        # Rename columns to match the database schema
        stock_data.rename(columns={
            'Adj Close': 'Adj_Close'
        }, inplace=True)

        # Connect to the database and save the data
        with sqlite3.connect(DB_PATH) as conn:
            stock_data.to_sql(RAW_STOCKS_TABLE, conn, if_exists='append', index=False)

        print(f"Data for {ticker} saved to the database.")
    except Exception as e:
        print(f"An error occurred while fetching data for {ticker}: {e}")


In [20]:
# Function to fetch data for multiple tickers in bulk
def fetch_and_store_bulk_data(tickers):
    """
    Fetches stock data for multiple tickers and saves it to the database.

    Parameters:
    tickers (list of str): List of stock ticker symbols.
    """
    for ticker in tickers:
        fetch_and_store_stock_data(ticker)
