In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import sys
from pathlib import Path

# Add project root to Python path to import utils
project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Import our custom indicators
from utils.indicators import calculate_ema, calculate_rsi, calculate_macd

# Load the merged dataset
df = pd.read_csv('../data/merged_stocks.csv')
df['Date'] = pd.to_datetime(df['Date'])
print(f"Loaded data with shape: {df.shape}")
print("\nSample of raw data:")
print(df.head())


Loaded data with shape: (3765, 9)

Sample of raw data:
                        Date        Open        High         Low       Close  \
0  2020-07-15 00:00:00-04:00   96.225127   96.475435   93.794954   94.995461   
1  2020-07-15 00:00:00-04:00  200.776424  202.472239  196.436297  199.320129   
2  2020-07-15 00:00:00-04:00  102.866669  103.333336   97.133331  103.067329   
3  2020-07-16 00:00:00-04:00   93.865422   94.684389   93.226285   93.826538   
4  2020-07-16 00:00:00-04:00  196.790796  197.078225  193.830315  195.372833   

      Volume  Dividends  Stock splits Ticker  
0  153198000        0.0           0.0   AAPL  
1   32179400        0.0           0.0   MSFT  
2  245517000        0.0           0.0   TSLA  
3  110577600        0.0           0.0   AAPL  
4   29940700        0.0           0.0   MSFT  


  df['Date'] = pd.to_datetime(df['Date'])


In [2]:
# Function to calculate all indicators for a single stock
def calculate_all_indicators(stock_df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate all technical indicators for a single stock's data.
    
    Args:
        stock_df (pd.DataFrame): DataFrame containing stock data
        
    Returns:
        pd.DataFrame: Original data with new indicator columns
    """
    # Make a copy to avoid modifying original
    df = stock_df.copy()
    
    # Calculate EMAs
    df['EMA10'] = calculate_ema(df['Close'], window=10)
    df['EMA50'] = calculate_ema(df['Close'], window=50)
    
    # Calculate RSI
    df['RSI'] = calculate_rsi(df['Close'], window=14)
    
    # Calculate MACD components
    macd_line, signal_line, histogram = calculate_macd(df['Close'])
    df['MACD'] = macd_line
    df['MACD_Signal'] = signal_line
    df['MACD_Hist'] = histogram
    
    return df

# Process each stock separately
enriched_dfs = []
for ticker in df['Ticker'].unique():
    print(f"\nProcessing {ticker}...")
    
    # Get data for this ticker
    stock_df = df[df['Ticker'] == ticker].sort_values('Date')
    
    # Calculate indicators
    enriched_df = calculate_all_indicators(stock_df)
    
    # Add to list
    enriched_dfs.append(enriched_df)
    
    print(f"Added indicators for {ticker}")
    
# Combine all processed data
enriched_df = pd.concat(enriched_dfs, axis=0)
enriched_df = enriched_df.sort_values(['Date', 'Ticker'])



Processing AAPL...
Added indicators for AAPL

Processing MSFT...
Added indicators for MSFT

Processing TSLA...
Added indicators for TSLA


In [3]:
# Display summary of the enriched dataset
print("\nEnriched dataset summary:")
print(f"Shape: {enriched_df.shape}")
print("\nColumns:", ', '.join(enriched_df.columns))

print("\nSample of enriched data:")
print(enriched_df.head())

print("\nMissing values summary:")
print(enriched_df.isnull().sum())

# Save enriched dataset
output_path = '../data/enriched_merged_stocks.csv'
enriched_df.to_csv(output_path, index=False)
print(f"\nSaved enriched dataset to: {output_path}")



Enriched dataset summary:
Shape: (3765, 15)

Columns: Date, Open, High, Low, Close, Volume, Dividends, Stock splits, Ticker, EMA10, EMA50, RSI, MACD, MACD_Signal, MACD_Hist

Sample of enriched data:
                        Date        Open        High         Low       Close  \
0  2020-07-15 00:00:00-04:00   96.225127   96.475435   93.794954   94.995461   
1  2020-07-15 00:00:00-04:00  200.776424  202.472239  196.436297  199.320129   
2  2020-07-15 00:00:00-04:00  102.866669  103.333336   97.133331  103.067329   
3  2020-07-16 00:00:00-04:00   93.865422   94.684389   93.226285   93.826538   
4  2020-07-16 00:00:00-04:00  196.790796  197.078225  193.830315  195.372833   

      Volume  Dividends  Stock splits Ticker       EMA10       EMA50  RSI  \
0  153198000        0.0           0.0   AAPL   93.000522  108.420025  NaN   
1   32179400        0.0           0.0   MSFT  197.044698  201.788203  NaN   
2  245517000        0.0           0.0   TSLA  101.982266  120.821227  NaN   
3  11057760