In [21]:
import os
import pandas as pd

# Directory where stock data CSV files are stored
data_dir = "stock_data"
output_dir = "processed_stock_data"
os.makedirs(output_dir, exist_ok=True)

# Function to preprocess data for each stock
def preprocess_stock_data(file_path):
    # Load the stock data
    data = pd.read_csv(file_path)
    
    # Ensure the Date column is a datetime object
    data["Date"] = pd.to_datetime(data["Date"])
    data.sort_values(by="Date", inplace=True)
    
    # Create new columns
    data["Daily Return"] = data["Close"].pct_change()  # Daily percentage return
    data["5-Day Moving Avg"] = data["Close"].rolling(window=5).mean()  # 5-day moving average
    data["10-Day Volatility"] = data["Close"].rolling(window=10).std()  # 10-day rolling standard deviation (volatility)
    
    # Drop rows with NaN values caused by rolling calculations
    data.dropna(inplace=True)
    
    return data

# Process and save each stock's data
for file_name in os.listdir(data_dir):
    if file_name.endswith(".csv"):
        stock_file_path = os.path.join(data_dir, file_name)
        stock_name = os.path.splitext(file_name)[0]
        print(f"Processing data for {stock_name}...")
        
        # Preprocess the data
        processed_data = preprocess_stock_data(stock_file_path)
        
        # Save the processed data
        output_path = os.path.join(output_dir, f"{stock_name}_processed.csv")
        processed_data.to_csv(output_path, index=False)
        print(f"Saved processed data for {stock_name} to {output_path}")

print("Data preprocessing complete!")


Processing data for AMZN...
Saved processed data for AMZN to processed_stock_data/AMZN_processed.csv
Processing data for MSFT...
Saved processed data for MSFT to processed_stock_data/MSFT_processed.csv
Processing data for NVDA...
Saved processed data for NVDA to processed_stock_data/NVDA_processed.csv
Processing data for TSLA...
Saved processed data for TSLA to processed_stock_data/TSLA_processed.csv
Processing data for GOOGL...
Saved processed data for GOOGL to processed_stock_data/GOOGL_processed.csv
Processing data for META...
Saved processed data for META to processed_stock_data/META_processed.csv
Processing data for AAPL...
Saved processed data for AAPL to processed_stock_data/AAPL_processed.csv
Data preprocessing complete!
