In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import sqlite3

# Path to SQLite database
db_path = 'stocks_data.db'

# Step 1: Load raw stock data from SQLite
with sqlite3.connect(db_path) as conn:
    query = "SELECT * FROM stocks"
    raw_data = pd.read_sql(query, conn)

print(f"Loaded data from SQLite: {raw_data.shape[0]} rows")

# Ensure 'Adj Close' is numeric
raw_data['Adj Close'] = pd.to_numeric(raw_data['Adj Close'], errors='coerce')
raw_data = raw_data.dropna(subset=['Adj Close'])


Loaded data from SQLite: 12084 rows


In [2]:
# Step 2: Engineer Features
# 2.1 Moving Averages
raw_data['7-day MA'] = raw_data.groupby('Ticker')['Adj Close'].transform(lambda x: x.rolling(window=7).mean())
raw_data['14-day MA'] = raw_data.groupby('Ticker')['Adj Close'].transform(lambda x: x.rolling(window=14).mean())

# 2.2 Volatility (7-day rolling standard deviation of percentage changes)
raw_data['Daily Return'] = raw_data.groupby('Ticker')['Adj Close'].pct_change()  # Daily percentage changes
raw_data['Volatility'] = raw_data.groupby('Ticker')['Daily Return'].transform(lambda x: x.rolling(window=7).std())

# 2.3 Lagged Prices
raw_data['Lag_1'] = raw_data.groupby('Ticker')['Adj Close'].shift(1)
raw_data['Lag_2'] = raw_data.groupby('Ticker')['Adj Close'].shift(2)

# Drop rows with NaN values caused by rolling/lags
processed_data = raw_data.dropna()

# Step 3: Save processed data back to SQLite
with sqlite3.connect(db_path) as conn:
    processed_data.to_sql('processed_stocks', conn, if_exists='replace', index=False)

print("Processed data saved to SQLite database (table: 'processed_stocks').")

# Optional: Check the processed data
print(processed_data.head())

Processed data saved to SQLite database (table: 'processed_stocks').
                   Date  Adj Close      Close       High        Low  \
13  2015-01-22 00:00:00  60.068398  92.870003  92.970001  91.320000   
14  2015-01-23 00:00:00  58.787739  90.889999  92.480003  90.790001   
15  2015-01-26 00:00:00  59.350456  91.760002  91.940002  90.470001   
16  2015-01-27 00:00:00  58.826542  90.949997  91.599998  90.599998   
17  2015-01-28 00:00:00  56.886135  87.949997  90.930000  87.820000   

         Open    Volume      Company Ticker   7-day MA  14-day MA  \
13  92.309998  13559000  Exxon Mobil    XOM  58.734147  58.857040   
14  92.279999  14706300  Exxon Mobil    XOM  58.816381  58.767411   
15  90.610001  10672500  Exxon Mobil    XOM  59.003029  58.835327   
16  91.239998  12301500  Exxon Mobil    XOM  59.186905  58.887994   
17  90.879997  17829700  Exxon Mobil    XOM  58.893996  58.760020   

    Daily Return  Volatility      Lag_1      Lag_2  
13      0.010885    0.011294  59.421