In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

def generate_yahoo_style_data(n_days=1500, seed=42):
    """
    Generate synthetic stock data in Yahoo Finance format.
    Returns DataFrame with columns: Open, High, Low, Close, Adj Close, Volume
    
    Parameters:
    -----------
    n_days : int
        Number of trading days to simulate
    seed : int
        Random seed for reproducibility
    """
    np.random.seed(seed)
    
    # Generate business dates (trading days only)
    end_date = datetime.now()
    start_date = end_date - timedelta(days=int(n_days * 1.5))
    dates = pd.date_range(start=start_date, end=end_date, freq='B')[:n_days]
    
    # Parameters for each stock
    stock_params = {
        'STN': {'price': 100, 'vol': 0.015, 'volume_base': 500000},
        'BWXT': {'price': 75, 'vol': 0.018, 'volume_base': 750000},
        'CEG': {'price': 90, 'vol': 0.02, 'volume_base': 1000000}
    }
    
    all_data = {}
    
    for ticker, params in stock_params.items():
        # Generate daily returns with slight upward drift
        returns = np.random.normal(0.0002, params['vol'], n_days)
        
        # Generate close prices
        close_prices = params['price'] * np.exp(np.cumsum(returns))
        
        # Generate daily volatility for high-low range
        daily_vol = params['vol'] * np.random.chisquare(df=5, size=n_days) / 5
        
        # Generate OHLC data
        high = close_prices * (1 + daily_vol)
        low = close_prices * (1 - daily_vol)
        open_prices = close_prices * (1 + np.random.normal(0, daily_vol/2))
        
        # Ensure proper OHLC relationship
        for i in range(len(close_prices)):
            max_price = max(high[i], low[i], open_prices[i], close_prices[i])
            min_price = min(high[i], low[i], open_prices[i], close_prices[i])
            high[i] = max_price
            low[i] = min_price
        
        # Generate volume with some correlation to price changes
        volume = params['volume_base'] * (1 + np.abs(returns) * 10) * \
                np.random.lognormal(0, 0.5, n_days)
        
        # Generate adjusted close (assuming some dividend events)
        adj_factor = np.ones(n_days)
        # Add random dividend adjustments (about twice per year)
        div_dates = np.random.choice(range(n_days), size=int(n_days/126), replace=False)
        for d in div_dates:
            adj_factor[d:] *= 0.99  # 1% adjustment for dividend
        adj_close = close_prices * adj_factor
        
        # Create DataFrame in Yahoo Finance format
        df = pd.DataFrame({
            'Open': open_prices,
            'High': high,
            'Low': low,
            'Close': close_prices,
            'Adj Close': adj_close,
            'Volume': volume.astype(int)
        }, index=dates)
        
        all_data[ticker] = df
    
    return all_data

# Generate the data
stock_data = generate_yahoo_style_data()

# Display sample of data for each stock
for ticker, df in stock_data.items():
    print(f"\nSample data for {ticker}:")
    print(df.head())
    # Save to CSV
    df.to_csv(f'{ticker}_yahoo_style.csv')

# Print data info
for ticker, df in stock_data.items():
    print(f"\nData info for {ticker}:")
    print(df.info())


Sample data for STN:
                                  Open        High         Low       Close  \
2018-10-08 21:17:55.533943  102.317173  102.899973   98.636038  100.768005   
2018-10-09 21:17:55.533943  100.545062  101.455834   99.702859  100.579347   
2018-10-10 21:17:55.533943  100.500677  102.755273  100.407895  101.581584   
2018-10-11 21:17:55.533943  104.385313  104.926262  102.973259  103.949760   
2018-10-12 21:17:55.533943  103.184420  105.035420  102.176614  103.606017   

                             Adj Close  Volume  
2018-10-08 21:17:55.533943  100.768005  393176  
2018-10-09 21:17:55.533943  100.579347  535200  
2018-10-10 21:17:55.533943  101.581584  650881  
2018-10-11 21:17:55.533943  103.949760  374541  
2018-10-12 21:17:55.533943  102.569957  521861  

Sample data for BWXT:
                                 Open       High        Low      Close  \
2018-10-08 21:17:55.533943  76.611798  76.894729  76.154133  76.524431   
2018-10-09 21:17:55.533943  77.740765  78.77