In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import sqlite3

# Path to SQLite database
db_path = 'database/stocks_data.db'

# Step 1: Load raw stock data from SQLite
with sqlite3.connect(db_path) as conn:
    query = "SELECT * FROM stocks"
    raw_data = pd.read_sql(query, conn)

print(f"Loaded data from SQLite: {raw_data.shape[0]} rows")

# Ensure 'Adj Close' is numeric
raw_data['Adj Close'] = pd.to_numeric(raw_data['Adj Close'], errors='coerce')
raw_data = raw_data.dropna(subset=['Adj Close'])


Loaded data from SQLite: 107400 rows


In [2]:
# Step 2: Engineer Features
# 2.1 Moving Averages
raw_data['7-day MA'] = raw_data.groupby('Ticker')['Adj Close'].transform(lambda x: x.rolling(window=7).mean().round(2))
raw_data['14-day MA'] = raw_data.groupby('Ticker')['Adj Close'].transform(lambda x: x.rolling(window=14).mean().round(2))

# 2.2 Volatility (7-day rolling standard deviation of percentage changes)
raw_data['Daily Return'] = raw_data.groupby('Ticker')['Adj Close'].pct_change()  # Daily percentage changes
raw_data['Volatility'] = raw_data.groupby('Ticker')['Daily Return'].transform(lambda x: x.rolling(window=7).std())

# 2.3 Lagged Prices
raw_data['Lag_1'] = raw_data.groupby('Ticker')['Adj Close'].shift(1)
raw_data['Lag_2'] = raw_data.groupby('Ticker')['Adj Close'].shift(2)

# Drop rows with NaN values caused by rolling/lags
processed_data = raw_data.dropna()

# Step 3: Save processed data back to SQLite
with sqlite3.connect(db_path) as conn:
    processed_data.to_sql('processed_stocks', conn, if_exists='replace', index=False)

print("Processed data saved to SQLite database (table: 'processed_stocks').")

# Optional: Check the processed data
print(processed_data.head())

Processed data saved to SQLite database (table: 'processed_stocks').
                   Date  Adj Close     Close      High       Low      Open  \
13  2000-01-21 00:00:00  19.416439  42.50000  42.59375  41.65625  42.00000   
14  2000-01-24 00:00:00  19.245108  42.12500  43.15625  41.50000  42.71875   
15  2000-01-25 00:00:00  19.202286  42.03125  43.06250  41.90625  42.00000   
16  2000-01-26 00:00:00  19.073801  41.75000  42.28125  41.34375  42.03125   
17  2000-01-27 00:00:00  18.502724  40.50000  41.65625  39.87500  41.65625   

      Volume      Company Ticker  7-day MA  14-day MA  Daily Return  \
13  14343600  Exxon Mobil    XOM     19.31      19.02      0.015683   
14  12459400  Exxon Mobil    XOM     19.33      19.12     -0.008824   
15  11921600  Exxon Mobil    XOM     19.29      19.23     -0.002225   
16   9298000  Exxon Mobil    XOM     19.28      19.28     -0.006691   
17  10548400  Exxon Mobil    XOM     19.16      19.21     -0.029940   

    Volatility      Lag_1      Lag_