In [17]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

In [18]:
def read_stock_data(csv_path):
    """
    Read stock data from CSV file with specific format:
    Date, Close/Last, Volume, Open, High, Low
    """
    # Read the CSV file with specific handling for spaces after commas
    df = pd.read_csv(csv_path, thousands=',', skipinitialspace=True)
    
    print("Columns found in CSV:", df.columns.tolist())
    
    # Convert Date column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Clean price columns by removing '$' and converting to float
    price_columns = ['Close/Last', 'Open', 'High', 'Low']
    for col in price_columns:
        df[col] = df[col].str.replace('$', '').astype(float)
    
    # Set Date as index
    df.set_index('Date', inplace=True)
    
    # Rename Close/Last to Close for consistency
    df = df.rename(columns={'Close/Last': 'Close'})
    
    return df

In [19]:
def plot_comprehensive_analysis(df):
    """
    Create comprehensive visualization of the stock data
    """
    plt.style.use('ggplot')  # Use a valid matplotlib style
    fig = plt.figure(figsize=(15, 12))
    
    # 1. Price and Volume Plot
    ax1 = plt.subplot2grid((3, 2), (0, 0), colspan=2)
    ax1.plot(df.index, df['Close'], label='Close Price', color='blue', linewidth=1)
    ax1.set_title('Stock Price Over Time')
    ax1.set_ylabel('Price ($)')
    ax1.grid(True)
    
    # Add volume as bars
    ax1v = ax1.twinx()
    ax1v.bar(df.index, df['Volume'], alpha=0.3, color='gray', label='Volume')
    ax1v.set_ylabel('Volume')
    
    # Combine legends
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax1v.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
    
    # 2. Candlestick-like Daily Range
    ax2 = plt.subplot2grid((3, 2), (1, 0))
    df['Daily_Range'] = df['High'] - df['Low']
    ax2.fill_between(df.index, df['High'], df['Low'], alpha=0.3, color='green')
    ax2.plot(df.index, df['Close'], color='blue', linewidth=1)
    ax2.set_title('Daily Price Range')
    ax2.set_ylabel('Price ($)')
    ax2.grid(True)
    
    # 3. Volume Distribution
    ax3 = plt.subplot2grid((3, 2), (1, 1))
    ax3.hist(df['Volume'], bins=50, color='gray')
    ax3.set_title('Volume Distribution')
    ax3.set_xlabel('Volume')
    
    # 4. Daily Returns
    ax4 = plt.subplot2grid((3, 2), (2, 0))
    daily_returns = df['Close'].pct_change() * 100
    ax4.plot(df.index, daily_returns, color='purple', linewidth=1)
    ax4.set_title('Daily Returns (%)')
    ax4.set_ylabel('Returns (%)')
    ax4.grid(True)
    
    # 5. Returns Distribution
    ax5 = plt.subplot2grid((3, 2), (2, 1))
    ax5.hist(daily_returns.dropna(), bins=50, color='purple')
    ax5.set_title('Returns Distribution')
    ax5.set_xlabel('Returns (%)')
    
    plt.tight_layout()
    plt.show()

In [20]:
def check_stationarity(timeseries):
    """
    Perform Augmented Dickey-Fuller test and plot stationarity analysis
    """
    # ADF Test
    result = adfuller(timeseries)
    print('Augmented Dickey-Fuller Test:')
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical values:')
    for key, value in result[4].items():
        print(f'\t{key}: {value:.3f}')
    
    # Stationarity plots
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 10))
    
    # Original Price
    ax1.plot(timeseries)
    ax1.set_title('Original Price Series')
    ax1.grid(True)
    
    # Rolling Statistics
    rolling_mean = timeseries.rolling(window=30).mean()
    rolling_std = timeseries.rolling(window=30).std()
    ax2.plot(timeseries, label='Original', alpha=0.5)
    ax2.plot(rolling_mean, label='30-day Rolling Mean', color='red')
    ax2.plot(rolling_std, label='30-day Rolling Std', color='green')
    ax2.set_title('Rolling Statistics')
    ax2.grid(True)
    ax2.legend()
    
    # First Difference
    ax3.plot(timeseries.diff().dropna())
    ax3.set_title('First Difference')
    ax3.grid(True)
    
    plt.tight_layout()
    plt.show()
    
    return result[1] < 0.05

In [21]:
def plot_forecast_results(original_data, predictions, forecast):
    """
    Plot the original data, fitted values, and forecast
    """
    plt.figure(figsize=(12, 6))
    
    # Plot original data
    plt.plot(original_data.index, original_data, label='Historical Data', color='blue', linewidth=1)
    
    # Plot fitted values
    plt.plot(predictions.index, predictions, label='Fitted Values', color='red', alpha=0.7, linewidth=1)
    
    # Plot forecast
    forecast_index = pd.date_range(start=original_data.index[-1], periods=len(forecast)+1, closed='right')
    plt.plot(forecast_index, forecast, label='Forecast', color='green', linewidth=2)
    
    plt.title('Stock Price Forecast using ARIMA')
    plt.xlabel('Date')
    plt.ylabel('Price ($)')
    plt.legend()
    plt.grid(True)
    plt.show()

In [22]:
def main():
    # Parameters
    csv_path = 'HistoricalQuotes.csv'
    forecast_days = 30

    try:
        # Read data
        print("Reading data from CSV...")
        df = read_stock_data(csv_path)
        print("\nSuccessfully loaded data!")
        print(f"Date range: {df.index.min()} to {df.index.max()}")
        
        # Plot comprehensive analysis
        print("\nGenerating analysis plots...")
        plot_comprehensive_analysis(df)
        
        # Check stationarity
        print("\nChecking stationarity...")
        is_stationary = check_stationarity(df['Close'])
        
        # Train ARIMA model
        print("\nTraining ARIMA model...")
        # Using ARIMA(1,1,1) since we confirmed non-stationarity
        model = ARIMA(df['Close'], order=(1,1,1))
        results = model.fit()
        
        # Make predictions and forecast
        predictions = results.get_prediction(start=0).predicted_mean
        forecast = results.forecast(steps=forecast_days)
        
        # Calculate error metrics
        mse = mean_squared_error(df['Close'], predictions)
        rmse = np.sqrt(mse)
        print(f"\nModel RMSE: ${rmse:.2f}")
        
        # Plot forecast results
        print("\nGenerating forecast plot...")
        plot_forecast_results(df['Close'], predictions, forecast)
        
        # Print next week's forecast
        print("\nForecast for the next 7 days:")
        next_week = forecast[:7]
        for date, price in zip(pd.date_range(start=df.index[-1], periods=8, closed='right')[:7], next_week):
            print(f"{date.strftime('%Y-%m-%d')}: ${price:.2f}")
        
    except Exception as e:
        print(f"\nError: {str(e)}")
        print("Please ensure your CSV file is in the correct format:")
        print("Date, Close/Last, Volume, Open, High, Low")
        print("Example: 02/28/2020, $273.36, 106721200, $257.26, $278.41, $256.37")

if __name__ == "__main__":
    main()

SyntaxError: expected 'except' or 'finally' block (737922961.py, line 48)