In [1]:
# Import necessary libraries
import pandas as pd
import sqlite3
import os

# Define constants
DB_PATH = 'database/stocks_data.db'  # Path to the SQLite database
RAW_STOCKS_TABLE = 'raw_stocks'  # Table containing raw stock data
PROCESSED_STOCKS_TABLE = 'processed_stocks'  # Table for storing processed data

# Ensure the database directory exists
if not os.path.exists(DB_PATH):
    raise FileNotFoundError(f"Database at {DB_PATH} does not exist. Run the data fetching script first.")


In [2]:
def initialize_processed_table():
    """
    Creates or updates the processed_stocks table schema to ensure it aligns with the data structure.
    """
    with sqlite3.connect(DB_PATH) as conn:
        conn.execute(f"""
        CREATE TABLE IF NOT EXISTS {PROCESSED_STOCKS_TABLE} (
            Date TEXT,
            Ticker TEXT,
            Open REAL,
            High REAL,
            Low REAL,
            Close REAL,
            Adj_Close REAL,
            Volume INTEGER,
            MovingAverage7Day REAL,
            MovingAverage14Day REAL,
            Volatility REAL,
            Lag1 REAL,
            Lag2 REAL,
            PRIMARY KEY (Date, Ticker)
        )
        """)
        print(f"Table {PROCESSED_STOCKS_TABLE} ensured in database.")



In [3]:
# Function to calculate new features for preprocessing
def calculate_features(data):
    """
    Calculates additional features for the stock data.

    Parameters:
    data (pd.DataFrame): The stock data to process.

    Returns:
    pd.DataFrame: Processed data with additional features.
    """
    data = data[['Date', 'Ticker', 'Adj_Close', 'Volume']].copy()  # Only keep necessary columns
    data['MovingAverage7Day'] = data['Adj_Close'].rolling(window=7).mean()
    data['MovingAverage14Day'] = data['Adj_Close'].rolling(window=14).mean()
    data['Volatility'] = data['Adj_Close'].rolling(window=7).std()
    data['Lag1'] = data['Adj_Close'].shift(1)
    data['Lag2'] = data['Adj_Close'].shift(2)
    return data.dropna()



In [4]:
# Function to process and store stock data
def process_and_store_data(ticker):
    """
    Processes stock data for a specific ticker and saves it to the database.

    Parameters:
    ticker (str): The stock ticker symbol (e.g., 'AAPL').
    """
    try:
        # Connect to the database and load raw data
        with sqlite3.connect(DB_PATH) as conn:
            query = f"SELECT * FROM {RAW_STOCKS_TABLE} WHERE Ticker = ?"
            raw_data = pd.read_sql_query(query, conn, params=(ticker,))
        
        if raw_data.empty:
            print(f"No raw data found for ticker {ticker}. Skipping.")
            return
        
        # Process the data
        processed_data = calculate_features(raw_data)

        # Save processed data back to the database
        with sqlite3.connect(DB_PATH) as conn:
            processed_data.to_sql(PROCESSED_STOCKS_TABLE, conn, if_exists='append', index=False)

        print(f"Processed data for {ticker} saved to the database.")
    except Exception as e:
        print(f"An error occurred while processing data for {ticker}: {e}")


In [5]:
# Function to process all tickers in the raw stocks table
def process_all_tickers():
    """
    Processes stock data for all tickers in the raw_stocks table.
    """
    with sqlite3.connect(DB_PATH) as conn:
        query = f"SELECT DISTINCT Ticker FROM {RAW_STOCKS_TABLE}"
        tickers = pd.read_sql_query(query, conn)['Ticker'].tolist()

    for ticker in tickers:
        process_and_store_data(ticker)
