### Stock Prices EDA and Technical Indicators

This notebook performs exploratory data analysis (EDA) on stock price data and calculates technical indicators for each stock in the yfinance_data folder.

#### Setup and Data Loading

Load the necessary libraries and all stock price data files, converting dates to datetime and setting the index.

In [1]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import talib
import os

# Function to load stock data
def load_stock_data(file_path):
    """Load and preprocess stock price data."""
    df = pd.read_csv(file_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    return df

# Load all stock data
stock_files = os.listdir('../data/yfinance_data/')
stock_dfs = {}
for file in stock_files:
    if file.endswith('.csv'):
        ticker = file.split('_')[0]
        stock_dfs[ticker] = load_stock_data(f'../data/yfinance_data/{file}')

ModuleNotFoundError: No module named 'talib'

### EDA and Technical Indicators

Perform EDA and calculate technical indicators for each stock, saving visualizations and processed data.

In [None]:
# Process each stock
for ticker, df in stock_dfs.items():
    print(f'\n=== EDA for {ticker} ===')
    print('Dataset Info:')
    print(df.info())

    # Check for missing values
    print('\nMissing Values:')
    print(df.isnull().sum())

    # Descriptive statistics
    print('\nDescriptive Statistics:')
    print(df[['Open', 'High', 'Low', 'Close', 'Volume']].describe())

    # Plot closing price
    plt.figure(figsize=(12, 6))
    df['Close'].plot(color='blue')
    plt.title(f'{ticker} Closing Price Over Time')
    plt.xlabel('Date')
    plt.ylabel('Price (USD)')
    plt.grid(True)
    plt.savefig(f'../data/processed/{ticker}_closing_price.png')
    plt.show()

    # Plot volume distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(df['Volume'], bins=30, kde=True)
    plt.title(f'{ticker} Volume Distribution')
    plt.xlabel('Volume')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.savefig(f'../data/processed/{ticker}_volume_distribution.png')
    plt.show()

    # Calculate technical indicators
    df['SMA_20'] = talib.SMA(df['Close'], timeperiod=20)
    df['RSI'] = talib.RSI(df['Close'], timeperiod=14)
    df['MACD'], df['MACD_Signal'], df['MACD_Hist'] = talib.MACD(
        df['Close'], fastperiod=12, slowperiod=26, signalperiod=9
    )

    # Plot technical indicators
    plt.figure(figsize=(12, 10))
    
    # Closing price and SMA
    plt.subplot(3, 1, 1)
    df[['Close', 'SMA_20']].plot(ax=plt.gca())
    plt.title(f'{ticker} Closing Price and 20-Day SMA')
    plt.ylabel('Price (USD)')
    plt.grid(True)

    # RSI
    plt.subplot(3, 1, 2)
    df['RSI'].plot(ax=plt.gca(), color='purple')
    plt.title(f'{ticker} RSI')
    plt.ylabel('RSI')
    plt.axhline(70, color='red', linestyle='--', alpha=0.5)
    plt.axhline(30, color='green', linestyle='--', alpha=0.5)
    plt.grid(True)

    # MACD
    plt.subplot(3, 1, 3)
    df[['MACD', 'MACD_Signal']].plot(ax=plt.gca())
    df['MACD_Hist'].plot(kind='bar', ax=plt.gca(), alpha=0.2)
    plt.title(f'{ticker} MACD')
    plt.ylabel('MACD')
    plt.grid(True)

    plt.tight_layout()
    plt.savefig(f'../data/processed/{ticker}_technical_indicators.png')
    plt.show()

    # Save processed data
    df.to_csv(f'../data/processed/{ticker}_processed.csv')
    print(f'Saved processed data for {ticker}')