In [None]:
import yfinance as yf

yf.download(['AAPL','META'], start='2023-01-01', end='2023-12-31')

In [2]:
import pandas as pd
import yfinance as yf

nasdaq = pd.read_html('https://en.wikipedia.org/wiki/Nasdaq-100')
sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

In [None]:
nasdaq_tickers = pd.read_csv('https://www.nasdaqtrader.com/dynamic/SymDir/nasdaqtraded.txt', sep='|')
nasdaq_tickers.head()
# # Clean up the data
# nasdaq_tickers = nasdaq_tickers[nasdaq_tickers['Test Issue'] == 'N']
# nasdaq_tickers = nasdaq_tickers[nasdaq_tickers['ETF'] == 'N']
# # Get just the symbols
# symbols = nasdaq_tickers['NASDAQ Symbol'].tolist()

In [None]:
import os
import pandas as pd
from tqdm import tqdm

if os.path.exists('ticker_data.csv'):
    final_df = pd.read_csv('ticker_data.csv')
else:
    final_df = pd.DataFrame(columns=['Ticker', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])
not_found = []

for ticker in tqdm(sp500[0]['Symbol'].tolist()):
    if ticker in final_df['Ticker'].tolist():
        continue
    else:
        try:
            df = yf.download(ticker, start='2019-01-01', end='2025-03-31', multi_level_index=False)
            df['Ticker'] = ticker
            final_df = pd.concat([final_df, df])
            final_df.to_csv('ticker_data.csv')
        except Exception as e:
            print(f"Error downloading {ticker}: {e}")
            not_found.append(ticker)


In [None]:
# Disable pandas warnings
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)


# Define columns to exclude
useless = ['address1',
 'city',
 'state',
 'zip',
 'country',
 'phone',
 'website',
 'industry',
 'industryKey',
 'industryDisp',
 'sector',
 'sectorKey',
 'sectorDisp',
 'longBusinessSummary',
 'fullTimeEmployees',
 'companyOfficers',
 'irWebsite',
 'executiveTeam',
 'maxAge',
 'fax',
 'displayName',
 'marketState',
 'longName',
 'exchangeTimezoneName',
 'exchangeTimezoneShortName'
 ]

# Initialize empty fundamentals dataframe
fundamentals = pd.DataFrame()

# Load ticker data
df = pd.read_csv('ticker_data.csv')

# Get fundamental data for each ticker
for ticker in tqdm(df['Ticker'].unique(), desc='Getting fundamental data'):
    try:
        tick = yf.Ticker(ticker)
        info = tick.info
        
        # Get all keys except excluded ones
        keys = [key for key in info.keys() if key not in useless]
        values = [info[key] for key in keys]
        
        # Create temporary dataframe for this ticker
        ticker_df = pd.DataFrame([values], columns=keys)
        ticker_df['Ticker'] = ticker
        
        # Append to fundamentals dataframe
        fundamentals = pd.concat([fundamentals, ticker_df], ignore_index=True)
        fundamentals.to_csv('fundamentals.csv', index=False)
    except Exception as e:
        print(f"Error getting data for {ticker}: {e}")

    


In [50]:
import sqlite3

# Connect to SQLite database
conn = sqlite3.connect('stocks.db')

# Read fundamentals table into pandas DataFrame
fundamentals = pd.read_sql_query("SELECT * FROM fundamentals", conn)
fundamentals.to_csv('fundamentals.csv', index=False)

In [None]:
import pandas as pd
cols = list(fundamentals.columns)

fundamentals = pd.read_csv('fundamentals.csv')
risks = [i for i in cols if 'Risk' in i]
risks_df = fundamentals[['Ticker']+risks]



fundamentals = fundamentals[cols]
add = [i for i in cols if (('date' in i.lower()) or ('time' in i.lower()))] 
remove = ['isEarningsDateEstimate','firstTradeDateMilliseconds','ipoExpectedDate','nameChangeDate']
from datetime import datetime
# Convert timestamp columns to datetime, handling each column separately
date_cols = [i for i in cols if ((i in add) and (i not in remove))]
relevant_dates = fundamentals[['Ticker']+date_cols]
for col in date_cols:
    relevant_dates[col] = relevant_dates[col].apply(lambda x: datetime.fromtimestamp(x) if pd.notnull(x) else x)
[cols.remove(i) for i in date_cols]


fundamentals[cols]
dividends = [i for i in cols if 'Dividend' in i]
dividends_df = fundamentals[['Ticker']+dividends]

[cols.remove(i) for i in dividends]

L1Y = [i for i in cols if 'fifty' in i]
L1Y_stats = fundamentals[['Ticker']+L1Y]
[cols.remove(i) for i in L1Y]





volume = [i for i in cols if 'volume' in i.lower()]
volume_df = fundamentals[['Ticker']+volume]
[cols.remove(i) for i in volume]

short = [i for i in cols if 'short' in i.lower()]
short_df = fundamentals[['Ticker']+short]
[cols.remove(i) for i in cols]
cols

fund_df = fundamentals[cols]
fund_df
# [cols.remove(i) for i in fund]

In [None]:
fund_df.columns

In [None]:
import matplotlib.pyplot as plt
ticker_df = pd.read_csv('ticker_data.csv')
tickers = ['GIS','APA']
%matplotlib inline

for i, ticker in enumerate(tickers, 1):
    plt.figure(figsize=(12,8))
    temp = ticker_df[ticker_df['Ticker'] == ticker]
    temp = temp.set_index('Date')
    plt.plot(temp.index, temp['Open'], label=f'{ticker} Opening Price')
    plt.title(f'{ticker} Stock Price Over Time')
    plt.xlabel('Date')
    plt.ylabel('Price ($)')
    plt.xticks(rotation=90)  # Rotate x-axis labels 90 degrees
    plt.xticks(plt.xticks()[0][::20])  # Show every 20th tick mark
    plt.legend()  # Add legend since we set a label
    plt.tight_layout()
    plt.show()


In [None]:
import numpy as np
from scipy.stats import norm
from datetime import datetime, timedelta

# Get GIS data and calculate required parameters
stock = 'AAPL'
r = 0.05  # Risk-free rate (assumed 5%)
T = 1/12  # Time horizon (2 months)
days = 30
n_simulations = 1000

gis_data = ticker_df[ticker_df['Ticker'] == stock].copy()
gis_data['Returns'] = np.log(gis_data['Close']/gis_data['Close'].shift(1))

# # Calculate parameters
S0 = gis_data['Close'].iloc[-1]  
sigma = np.std(gis_data['Returns'].dropna()) * np.sqrt(252)  # Annual volatility (252 trading days per year)


# # Generate time points for prediction (daily for 2 months)
t = np.linspace(0, T, days)

# # Black-Scholes predicted price paths
Z = np.random.standard_normal((n_simulations, days))
# # Fix broadcasting issue by transposing t array
S = S0 * np.exp((r - 0.5 * sigma**2) * t[None, :] + sigma * np.sqrt(t)[None, :] * Z)

# # Calculate confidence intervals
lower_bound = np.percentile(S, 5, axis=0)
upper_bound = np.percentile(S, 95, axis=0)
mean_path = np.mean(S, axis=0)

# # Plot results
plt.figure(figsize = (10,5))
gis_data_LD = gis_data.iloc[-days:]
gis_data_LD
plt.plot(pd.to_datetime(gis_data_LD['Date']), gis_data_LD['Close'], label='Historical Data')
future_dates = pd.date_range(start=pd.to_datetime(gis_data_LD.iloc[-1]['Date']), periods=days+1, freq='D')[1:]
plt.plot(future_dates, mean_path, label='Mean Prediction')

plt.fill_between(future_dates, lower_bound, upper_bound, color='gray', alpha=0.2, label=f'90% Confidence Interval')

plt.title(f'{stock} Stock Price Prediction using Black-Scholes Model')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show();

print(f"Current {stock} Price: ${S0:.2f}")
print(f"Predicted Price Range after 2 months:")
print(f"Lower bound (5th percentile): ${lower_bound[-1]:.2f}")
print(f"Mean prediction: ${mean_path[-1]:.2f}")
print(f"Upper bound (95th percentile): ${upper_bound[-1]:.2f}")


In [172]:
import matplotlib.pyplot as plt
import pandas as pd

In [173]:
df = pd.read_csv('ticker_data.csv')
df = df[df['Ticker'] == 'AAPL']

In [None]:
def simulate_ar(intercept, coef1, coef2, noise=0.3, *, warmup=10, steps=200):
    """
    Simulate an autoregressive (AR) time series process of order 2.
    
    Parameters:
    -----------
    intercept : float
        The constant term in the AR model
    coef1 : float
        Coefficient for the first lag (AR(1) term)
    coef2 : float
        Coefficient for the second lag (AR(2) term)
    noise : float, default=0.3
        Standard deviation of the random noise
    warmup : int, default=10
        Number of initial steps to discard to let the process stabilize
    steps : int, default=200
        Number of time steps to simulate after warmup
        
    Returns:
    --------
    numpy.ndarray
        The simulated AR process values (excluding warmup period)
    """
    # Create an array to hold all values (including warmup)
    draws = np.zeros(warmup + steps)
    
    # Initialize first two values to the intercept (starting point)
    draws[:2] = intercept
    
    # Generate the AR process: each value depends on previous values
    for step in range(2, warmup + steps):
        # AR(2) formula: y_t = intercept + coef1*y_{t-1} + coef2*y_{t-2} + noise
        draws[step] = (
            intercept                        # Constant term
            + coef1 * draws[step - 1]        # First lag effect
            + coef2 * draws[step - 2]        # Second lag effect
            + np.random.normal(0, noise)     # Random noise/shock
        )
    
    # Return only the values after warmup period
    return draws[warmup:]


# Generate an AR(1) process with negative coefficient (-0.9)
# This will create an oscillating pattern (values bounce up and down)
# The intercept is 10 (baseline value), and coef2=0 means it's only AR(1), not AR(2)

ar1_data = simulate_ar(10, -0.9, 0)

# Create a figure to visualize the simulated time series
fig, ax = plt.subplots(figsize=(10, 3))
ax.set_title("Generated Autoregressive Timeseries", fontsize=15)
ax.plot(ar1_data);  # Plot the simulated data
plt.show()  # Display the plot

In [None]:
## Set up a dictionary for the specification of our priors
## We set up the dictionary to specify size of the AR coefficients in
## case we want to vary the AR lags.
priors = {
    "coefs": {"mu": [10, 0.2], "sigma": [0.1, 0.1], "size": 2},
    "sigma": 8,
    "init": {"mu": 9, "sigma": 0.1, "size": 1},
}

## Initialise the model
with pm.Model() as AR:
    pass

## Define the time interval for fitting the data
t_data = list(range(len(ar1_data)))
## Add the time interval as a mutable coordinate to the model to allow for future predictions
AR.add_coord("obs_id", t_data)

with AR:
    ## Data containers to enable prediction
    t = pm.Data("t", t_data, dims="obs_id")
    y = pm.Data("y", ar1_data, dims="obs_id")

    # The first coefficient will be the constant term but we need to set priors for each coefficient in the AR process
    coefs = pm.Normal("coefs", priors["coefs"]["mu"], priors["coefs"]["sigma"])
    sigma = pm.HalfNormal("sigma", priors["sigma"])
    # We need one init variable for each lag, hence size is variable too
    init = pm.Normal.dist(
        priors["init"]["mu"], priors["init"]["sigma"], size=priors["init"]["size"]
    )
    # Steps of the AR model minus the lags required
    ar1 = pm.AR(
        "ar",
        coefs,
        sigma=sigma,
        init_dist=init,
        constant=True,
        steps=t.shape[0] - (priors["coefs"]["size"] - 1),
        dims="obs_id",
    )

    # The Likelihood
    outcome = pm.Normal("likelihood", mu=ar1, sigma=sigma, observed=y, dims="obs_id")
    ## Sampling
    idata_ar = pm.sample_prior_predictive()
    idata_ar.extend(pm.sample(1000, random_seed=100, target_accept=0.95))
    idata_ar.extend(pm.sample_posterior_predictive(idata_ar))

In [None]:
# Plot the posterior distributions and model fit
import arviz as az
import matplotlib.pyplot as plt

# Set up the figure for posterior plots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Plot the posterior distributions of model parameters
az.plot_posterior(idata_ar, var_names=["coefs"], ax=axes[0])
az.plot_posterior(idata_ar, var_names=["sigma"], ax=axes[1])

# Plot the model fit against the data
ax = axes[2]
# Plot the original data
ax.plot(t_data, ar1_data, 'o', color='black', alpha=0.6, label='Observed data')

# Plot the posterior predictive samples
posterior_pred = idata_ar.posterior_predictive["likelihood"].values
# Take a subset of posterior samples for clarity
n_samples = 50
sample_idx = np.random.choice(posterior_pred.shape[0] * posterior_pred.shape[1], n_samples, replace=False)
for idx in sample_idx:
    chain_idx, draw_idx = idx // posterior_pred.shape[1], idx % posterior_pred.shape[1]
    ax.plot(t_data, posterior_pred[chain_idx, draw_idx], color='blue', alpha=0.1)

# Plot the mean of the posterior predictive
posterior_pred_mean = posterior_pred.mean(axis=(0, 1))
ax.plot(t_data, posterior_pred_mean, color='red', linewidth=2, label='Posterior mean')

ax.set_title('Model Fit')
ax.set_xlabel('Time')
ax.set_ylabel('Value')
ax.legend()

plt.tight_layout()
plt.show()

# Plot trace plots to check convergence
az.plot_trace(idata_ar, var_names=["coefs", "sigma"])
plt.tight_layout()
plt.show()


In [None]:
meta_data[meta_data.High == meta_data.High.max()]

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import yfinance as yf
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec

def download_and_analyze_hourly_data(ticker='META', start_date='2023-04-15', end_date='2024-04-01'):
    """
    Download hourly stock data and calculate high/low percentage changes from open price.
    
    Parameters:
    -----------
    ticker : str
        Stock ticker symbol
    start_date : str
        Start date in YYYY-MM-DD format
    end_date : str
        End date in YYYY-MM-DD format
        
    Returns:
    --------
    pandas.DataFrame
        Processed hourly stock data with High/Low as percentage changes from open
    """
    # Download hourly data
    stock_data = yf.download(ticker, start=start_date, end=end_date, 
                            multi_level_index=False, interval='1h')
    
    # Extract date from datetime index
    stock_data['Date'] = stock_data.index.date
    
    # Convert High/Low to percentage change from Open
    stock_data['High'] = np.round(stock_data['High']*100/stock_data['Open'],2) - 100
    stock_data['Low'] = np.round(stock_data['Low']*100/stock_data['Open'],2) - 100
    
    return stock_data

def calculate_hourly_range_stats(stock_data):
    """
    Calculate daily statistics of hourly price ranges.
    
    Parameters:
    -----------
    stock_data : pandas.DataFrame
        Processed stock data with High/Low as percentage changes
        
    Returns:
    --------
    pandas.DataFrame
        Daily statistics of hourly price ranges
    """
    temp = stock_data.reset_index().drop(columns=['Datetime']).set_index('Date')[['High','Low']]
    temp['Hourly Range'] = temp['High'] - temp['Low']
    return temp.groupby('Date').agg({'Hourly Range':['mean','std']})

def plot_hourly_range_with_price_subplots(hourly_stats, daily_data, ticker='META'):
    """
    Create attractive subplots showing both the standard deviation of hourly price ranges
    and the daily stock price.
    
    Parameters:
    -----------
    hourly_stats : pandas.DataFrame
        Daily statistics of hourly price ranges
    daily_data : pandas.DataFrame
        Daily stock price data
    ticker : str
        Stock ticker symbol for title
        
    Returns:
    --------
    matplotlib.figure.Figure
        The created figure
    """
    # Extract the standard deviation of hourly range
    hourly_range_std = hourly_stats['Hourly Range']['std']
    
    # Set the style for a more modern look
    sns.set_style("whitegrid")
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica', 'DejaVu Sans']

    # Create a figure with subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 6), dpi=100, facecolor='white', 
                                   sharex=True, gridspec_kw={'height_ratios': [1, 1.5]})
    
    # Plot hourly range std on top subplot
    ax1.plot(hourly_range_std.index, hourly_range_std.values, 
             color='#3366CC', linewidth=2.5, alpha=0.9)
    
    # Add a subtle shadow/area under the line
    ax1.fill_between(hourly_range_std.index, hourly_range_std.values, 
                     alpha=0.2, color='#3366CC')
    
    # Plot daily closing price on bottom subplot
    ax2.plot(daily_data.index, daily_data['Close'], 
             color='#FF6600', linewidth=1, alpha=0.9)
    
    # Add candlestick-like elements to show daily range
    for idx, row in daily_data.iterrows():
        ax2.vlines(idx, row['Low'], row['High'], color='#FF6600', alpha=0.5, linewidth=1.5)
    
    # Add a subtle shadow/area under the price line
    ax2.fill_between(daily_data.index, daily_data['Close'], 
                     min(daily_data['Low']), alpha=0.1, color='#FF6600')

    # Improve the date formatting on x-axis
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    plt.xticks(rotation=45)

    # Add title and labels with better styling
    fig.suptitle(f'{ticker} Price and Intraday Volatility', fontsize=16, fontweight='bold', y=0.98)
    ax1.set_title('Hourly Price Range Volatility', fontsize=12, pad=10)
    ax2.set_title('Daily Stock Price', fontsize=12, pad=10)
    
    ax2.set_xlabel('Date', fontsize=12, labelpad=10)
    ax1.set_ylabel('Std Dev of Hourly Range (%)', fontsize=12, labelpad=10, color='#3366CC')
    ax2.set_ylabel('Stock Price ($)', fontsize=12, labelpad=10, color='#FF6600')
    
    # Set tick colors to match the lines
    ax1.tick_params(axis='y', colors='#3366CC')
    ax2.tick_params(axis='y', colors='#FF6600')

    # Add grid but make it subtle
    ax1.grid(True, alpha=0.3, linestyle='--')
    ax2.grid(True, alpha=0.3, linestyle='--')

    # Add annotations for maximum volatility value
    max_idx = hourly_range_std.idxmax()
    max_val = hourly_range_std.max()
    ax1.scatter(max_idx, max_val, color='red', s=80, zorder=5)
    ax1.annotate(f'Max Volatility: {max_val:.2f}%', 
                 xy=(max_idx, max_val),
                 xytext=(10, -15),
                 textcoords='offset points',
                 fontsize=10,
                 bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.7))
    
    # Find corresponding price at max volatility
    if max_idx in daily_data.index:
        max_price = daily_data.loc[max_idx, 'Close']
        ax2.scatter(max_idx, max_price, color='red', s=80, zorder=5)
        ax2.annotate(f'Price: ${max_price:.2f}', 
                    xy=(max_idx, max_price),
                    xytext=(10, 15),
                    textcoords='offset points',
                    fontsize=10,
                    bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.7))

    # Add a subtle border
    for ax in [ax1, ax2]:
        for spine in ax.spines.values():
            spine.set_edgecolor('#CCCCCC')
        
    plt.tight_layout()
    return fig

# Example usage
if __name__ == "__main__":
    # Set date range
    start_date = '2025-02-01'
    end_date = '2025-04-05'
    ticker = 'META'
    
    # Download and process hourly data
    meta_data = download_and_analyze_hourly_data(ticker, start_date, end_date)
    
    # Calculate statistics
    hourly_stats = calculate_hourly_range_stats(meta_data)
    
    # Download daily data
    daily_data = yf.download(ticker, start=start_date, end=end_date, interval='1d', multi_level_index=False)
    
    # Create and display the combined subplot visualization
    fig = plot_hourly_range_with_price_subplots(hourly_stats, daily_data, ticker)
    plt.show()

In [None]:
def plot_smoothed_data(data, ma_windows=[3, 5], ema_spans=[5], figsize=(10, 6), remove_raw_data=False):
    # Create a copy of the data to avoid modifying the original
    data_copy = data.copy()
    
    # Ensure Date is set as the index for proper time series operations
    if 'Date' in data_copy.columns:
        data_copy.set_index('Date', inplace=True)
    
    # Ensure 'Hourly Range' is numeric
    data_copy['Hourly Range'] = pd.to_numeric(data_copy['Hourly Range'], errors='coerce')
    
    timeseries = {}
    plt.figure(figsize=figsize)
    
    # Plot raw data
    if not remove_raw_data:
        plt.plot(data_copy.index, data_copy['Hourly Range'], color='red', linewidth=2, label='Raw Data')
    timeseries['RawData'] = data_copy
    
    # Plot moving averages
    for window in ma_windows:
        ma = data_copy['Hourly Range'].rolling(window=window).mean()
        plt.plot(data_copy.index, ma, linewidth=1.5, label=f'{window}-Day MA')
        timeseries[f'{window}-Day MA'] = ma
    
    # Plot exponential moving averages
    for span in ema_spans:
        ema = data_copy['Hourly Range'].ewm(span=span, adjust=False).mean()
        plt.plot(data_copy.index, ema, linewidth=1.5, label=f'EMA (span={span})')
        timeseries[f'EMA (span={span})'] = ema
    
    plt.title('Standard Deviation of Hourly Range by Date')
    plt.xlabel('Date')
    plt.ylabel('Standard Deviation of Hourly Range')
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    return timeseries

# Example usage with different parameters

hourly_range = hourly_stats['Hourly Range']['std']
hourly_range = hourly_range.reset_index()
hourly_range.rename(columns={'std': 'Hourly Range'}, inplace=True)

hourly_range = hourly_range[hourly_range['Date'] >= pd.to_datetime('2025-03-01').date()]
timeseries = plot_smoothed_data(hourly_range, ma_windows=[2,5,10], ema_spans=[2,5,10], remove_raw_data=True)


In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
from datetime import timedelta
from sklearn.metrics import mean_squared_error
from math import sqrt
import warnings

def find_best_arima_model(time_series, p_values=range(0, 6), d_values=range(0, 2), q_values=range(0, 6)):
    """
    Perform a grid search to find the best ARIMA model parameters based on RMSE.
    
    Parameters:
    -----------
    time_series : pandas.Series
        The time series data to model
    p_values : iterable
        Range of p values to test (AR order)
    d_values : iterable
        Range of d values to test (differencing)
    q_values : iterable
        Range of q values to test (MA order)
    
    Returns:
    --------
    statsmodels.tsa.arima.model.ARIMAResults
        The fitted ARIMA model
    """
    # Drop NaN values for model fitting
    time_series = time_series.dropna()
    
    best_score = float('inf')
    best_order = None
    best_model = None
    best_rmse = float('inf')
    
    # Suppress warnings during grid search
    warnings.filterwarnings("ignore")
    
    # Split data for training and testing (80% train, 20% test)
    train_size = int(len(time_series) * 0.8)
    train, test = time_series[:train_size], time_series[train_size:]
    
    print(f"Grid searching for best ARIMA model parameters...")
    
    for p in p_values:
        for d in d_values:
            for q in q_values:
                try:
                    # Fit the model
                    model = ARIMA(train, order=(p, d, q))
                    model_fit = model.fit()
                    
                    # Make predictions
                    predictions = model_fit.forecast(steps=len(test))
                    
                    # Calculate RMSE
                    rmse = sqrt(mean_squared_error(test, predictions))
                    
                    # Calculate AIC (Akaike Information Criterion)
                    aic = model_fit.aic
                    
                    # Check if the model produces flat predictions
                    pred_variance = np.var(predictions)
                    if pred_variance < 0.0001:  # Very low variance indicates flat line
                        continue
                    
                    # Use AIC as primary criterion, but also consider RMSE
                    # Lower AIC indicates better model fit with penalty for complexity
                    # This helps avoid overfitting while ensuring realistic predictions
                    if aic < best_score or (abs(aic - best_score) < 2 and rmse < best_rmse):
                        best_score = aic
                        best_rmse = rmse
                        best_order = (p, d, q)
                        best_model = model_fit
                        print(f"New best ARIMA{best_order} - AIC: {aic:.2f}, RMSE: {rmse:.4f}, Var: {pred_variance:.6f}")
                
                except Exception as e:
                    continue
    
    # Re-enable warnings
    warnings.filterwarnings("default")
    
    if best_order is None:
        print("No suitable ARIMA model found. Using default (1,1,1).")
        model = ARIMA(time_series, order=(1, 1, 1))
        best_model = model.fit()
    else:
        print(f"Best ARIMA model: {best_order} with AIC: {best_score:.4f}, RMSE: {best_rmse:.4f}")
        # Fit the best model on the full dataset
        model = ARIMA(time_series, order=best_order)
        best_model = model.fit()
    
    return best_model

def forecast_time_series(timeseries, hourly_range, forecast_days=10, show_plot=True):
    """
    Forecast time series data using ARIMA models.
    
    Parameters:
    -----------
    timeseries : dict
        Dictionary containing time series data
    hourly_range : pandas.DataFrame
        DataFrame containing hourly range data
    forecast_days : int, optional
        Number of days to forecast ahead, by default 10
    show_plot : bool, optional
        Whether to show the plot, by default True
    
    Returns:
    --------
    dict
        Dictionary containing forecasts for each time series
    """
    # Get all time series except RawData
    ts_keys = [i for i in list(timeseries.keys()) if 'RawData' not in i]
    print(ts_keys)
    
    # Calculate the date range for the forecast
    last_date = hourly_range['Date'].max()
    date_range = pd.date_range(start=last_date + timedelta(days=1), periods=forecast_days)
    
    # Create a figure for all forecasts if show_plot is True
    if show_plot:
        plt.figure(figsize=(12, 7))
    
    forecasts = {}
    for key in ts_keys:
        # Get the time series data
        ts_data = timeseries[key].dropna()  # Drop NaN values for model fitting
        
        # Fit ARIMA model
        model = find_best_arima_model(ts_data)
        
        # Forecast next period
        forecast = model.forecast(steps=forecast_days)
        forecasts[key] = forecast
        
        if show_plot:
            # Plot only the last 10 days of historical data plus the forecast
            last_10_days = ts_data.iloc[-10:]
            plt.plot(last_10_days.index, last_10_days, label=f'{key} (Historical)', alpha=0.7)
            plt.plot(date_range, forecast, '--', label=f'{key} (Forecast)')
        
        print(f"Forecast for {key}:")
        print(forecast)
        print("-" * 50)
    
    if show_plot:
        plt.title('Time Series Forecasts (1 Month Ahead)')
        plt.xlabel('Date')
        plt.ylabel('Hourly Range')
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.tight_layout()
        plt.show()
    
    return forecasts, date_range
forecasts, date_range = forecast_time_series(timeseries, hourly_range, forecast_days=10)

In [None]:
# Create subplots for EMA and MA forecasts with improved aesthetics
ts_keys = [i for i in list(timeseries.keys()) if 'RawData' not in i]

# Filter keys to get only EMA and MA related forecasts
ema_keys = [key for key in ts_keys if 'EMA' in key]
ma_keys = [key for key in ts_keys if 'MA' in key and 'EMA' not in key]

# Create a figure with 2 subplots (one for EMA, one for MA)
fig, axes = plt.subplots(2, 1, figsize=(10, 12), facecolor='#f9f9f9')
fig.subplots_adjust(hspace=0.3)

# Define color palettes for each subplot
ema_colors = plt.cm.viridis(np.linspace(0, 0.8, len(ema_keys)))
ma_colors = plt.cm.plasma(np.linspace(0, 0.8, len(ma_keys)))

# Plot EMA forecasts on the first subplot
for idx, key in enumerate(ema_keys):
    ax = axes[0]
    
    historical_data = timeseries[key].dropna()
    combined_dates = list(historical_data.index[-10:]) + list(date_range)
    combined_values = list(historical_data[-10:]) + list(forecasts[key])
    
    # Plot historical data with better styling
    ax.plot(historical_data.index[-10:], historical_data[-10:], 
            color=ema_colors[idx], linewidth=2.5, label=f'{key} (Historical)', alpha=0.8)
    
    # Plot the combined line with subtle styling
    ax.plot(combined_dates, combined_values, color=ema_colors[idx], alpha=0.3, linewidth=1.5)
    
    # Plot forecast with dashed line and better styling
    ax.plot(date_range, forecasts[key], '--', color=ema_colors[idx], 
            linewidth=2.5, label=f'{key} (Forecast)')
    
    # Add forecast start line (only for the first key to avoid duplication)
    if idx == 0:
        ax.axvline(x=date_range[0], color='#444444', linestyle='--', alpha=0.7, 
                   linewidth=1.5, label='Forecast Start')
        
        # Add annotation for forecast start
        ax.annotate('Forecast Begins', xy=(date_range[0], ax.get_ylim()[1]*0.95),
                    xytext=(date_range[0] - pd.Timedelta(days=1), ax.get_ylim()[1]*0.95),
                    arrowprops=dict(arrowstyle='->', color='#444444', lw=1.5),
                    fontsize=10, color='#444444')
    
    # Add vertical line at max prediction with annotation
    predictions = list(forecasts[key].values)
    max_index = predictions.index(max(predictions))
    max_date = date_range[max_index]
    max_value = predictions[max_index]
    
    ax.axvline(x=max_date, color=ema_colors[idx], linestyle=':', alpha=0.7, linewidth=1.5)
    
    # Add annotation for peak date
    ax.annotate(f'{key} Peak: {max_date.strftime("%Y-%m-%d")}', 
                xy=(max_date, max_value),
                xytext=(max_date + pd.Timedelta(days=1), max_value + idx*0.01),
                arrowprops=dict(arrowstyle='->', color=ema_colors[idx], lw=1.5),
                fontsize=9, color=ema_colors[idx], fontweight='bold',
                horizontalalignment='left')
    
    # Add date annotations for all forecast points
    for i, date in enumerate(date_range):
        value = forecasts[key].iloc[i] if hasattr(forecasts[key], 'iloc') else forecasts[key][i]
        # Add small markers at each forecast point
        ax.plot(date, value, 'o', color=ema_colors[idx], markersize=4)
        
        # Add date labels only for the first series to avoid clutter
        if idx == 0:
            ax.annotate(date.strftime("%m-%d"), xy=(date, value),
                       xytext=(0, -15), textcoords='offset points',
                       ha='center', va='top', fontsize=8, rotation=45)

# Plot MA forecasts on the second subplot
for idx, key in enumerate(ma_keys):
    ax = axes[1]
    
    historical_data = timeseries[key].dropna()
    combined_dates = list(historical_data.index[-10:]) + list(date_range)
    combined_values = list(historical_data[-10:]) + list(forecasts[key])
    
    # Plot historical data with better styling
    ax.plot(historical_data.index[-10:], historical_data[-10:], 
            color=ma_colors[idx], linewidth=2.5, label=f'{key} (Historical)', alpha=0.8)
    
    # Plot the combined line with subtle styling
    ax.plot(combined_dates, combined_values, color=ma_colors[idx], alpha=0.3, linewidth=1.5)
    
    # Plot forecast with dashed line and better styling
    ax.plot(date_range, forecasts[key], '--', color=ma_colors[idx], 
            linewidth=2.5, label=f'{key} (Forecast)')
    
    # Add forecast start line (only for the first key to avoid duplication)
    if idx == 0:
        ax.axvline(x=date_range[0], color='#444444', linestyle='--', alpha=0.7, 
                   linewidth=1.5, label='Forecast Start')
        
        # Add annotation for forecast start
        ax.annotate('Forecast Begins', xy=(date_range[0], ax.get_ylim()[1]*0.95),
                    xytext=(date_range[0] - pd.Timedelta(days=1), ax.get_ylim()[1]*0.95),
                    arrowprops=dict(arrowstyle='->', color='#444444', lw=1.5),
                    fontsize=10, color='#444444')
    
    # Add vertical line at max prediction with annotation
    predictions = list(forecasts[key].values)
    max_index = predictions.index(max(predictions))
    max_date = date_range[max_index]
    max_value = predictions[max_index]
    
    ax.axvline(x=max_date, color=ma_colors[idx], linestyle=':', alpha=0.7, linewidth=1.5)
    
    # Add annotation for peak date
    ax.annotate(f'{key} Peak: {max_date.strftime("%Y-%m-%d")}', 
                xy=(max_date, max_value),
                xytext=(max_date + pd.Timedelta(days=1), max_value + idx*0.01),
                arrowprops=dict(arrowstyle='->', color=ma_colors[idx], lw=1.5),
                fontsize=9, color=ma_colors[idx], fontweight='bold',
                horizontalalignment='left')
    
    # Add date annotations for all forecast points
    for i, date in enumerate(date_range):
        value = forecasts[key].iloc[i] if hasattr(forecasts[key], 'iloc') else forecasts[key][i]
        # Add small markers at each forecast point
        ax.plot(date, value, 'o', color=ma_colors[idx], markersize=4)
        
        # Add date labels only for the first series to avoid clutter
        if idx == 0:
            ax.annotate(date.strftime("%m-%d"), xy=(date, value),
                       xytext=(0, -15), textcoords='offset points',
                       ha='center', va='top', fontsize=8, rotation=45)

# Enhance plot styling for both subplots
for i, title in enumerate(['EMA Forecasts (10 Days Ahead)', 'MA Forecasts (10 Days Ahead)']):
    ax = axes[i]
    ax.set_title(title, fontsize=14, fontweight='bold', pad=15)
    ax.set_xlabel('Date', fontsize=12, fontweight='bold')
    ax.set_ylabel('Hourly Range', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3, linestyle='--')
    
    # Add shaded region for forecast period
    ax.axvspan(date_range[0], date_range[-1], alpha=0.1, color='gray', label='Forecast Period')
    
    # Improve legend
    ax.legend(loc='upper left', frameon=True, framealpha=0.9, fontsize=10)
    
    # Add a subtle border
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(0.5)
    ax.spines['bottom'].set_linewidth(0.5)

plt.tight_layout()
plt.show()

In [None]:
a = [1,2,3,5]
# Find the index of the maximum value in list a
max_index = a.index(max(a))
print(f"The index of the maximum value in a is: {max_index}")
print(f"The maximum value is: {a[max_index]}")


predictions = list(forecasts[key].values[1:])
max_index = predictions.index(max(predictions))
forecasts[key]

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm

# Set style for better-looking plots
plt.style.use('seaborn-v0_8')  # Updated to use the correct style name
# Alternative options: 'ggplot', 'fivethirtyeight', or 'seaborn-v0_8-darkgrid'
sns.set_palette("husl")

def plot_straddle_payoff():
    # Parameters
    strike_price = 100
    premium_call = 5
    premium_put = 5
    total_premium = premium_call + premium_put
    
    # Generate price range
    stock_prices = np.linspace(70, 130, 1000)
    
    # Calculate payoffs
    call_payoff = np.maximum(stock_prices - strike_price, 0) - premium_call
    put_payoff = np.maximum(strike_price - stock_prices, 0) - premium_put
    total_payoff = call_payoff + put_payoff
    
    # Create the plot
    plt.figure(figsize=(12, 7))
    plt.plot(stock_prices, call_payoff, '--', label='Call Option', alpha=0.7)
    plt.plot(stock_prices, put_payoff, '--', label='Put Option', alpha=0.7)
    plt.plot(stock_prices, total_payoff, 'b-', label='Straddle Payoff', linewidth=2)
    
    # Add horizontal line at y=0
    plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
    plt.axvline(x=strike_price, color='k', linestyle='--', alpha=0.3)
    
    # Add labels and title
    plt.title('Straddle Strategy Payoff Profile', fontsize=14, pad=15)
    plt.xlabel('Stock Price at Expiration', fontsize=12)
    plt.ylabel('Profit/Loss', fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, alpha=0.3)
    
    # Add break-even points annotation
    break_even_lower = strike_price - total_premium
    break_even_upper = strike_price + total_premium
    plt.plot([break_even_lower, break_even_upper], [0, 0], 'ro')
    plt.annotate(f'Break-even: {break_even_lower:.1f}', 
                xy=(break_even_lower, 0), xytext=(break_even_lower-5, -10),
                arrowprops=dict(facecolor='red', shrink=0.05))
    plt.annotate(f'Break-even: {break_even_upper:.1f}', 
                xy=(break_even_upper, 0), xytext=(break_even_upper+5, -10),
                arrowprops=dict(facecolor='red', shrink=0.05))
    
    plt.tight_layout()
    plt.show()  # Explicitly show the plot

def plot_iv_impact():
    # Parameters
    strike_price = 100
    time_to_expiry = 1.0  # 1 year
    risk_free_rate = 0.05
    spot_price = 100
    
    def black_scholes_call(S, K, T, r, sigma):
        d1 = (np.log(S/K) + (r + sigma**2/2)*T) / (sigma*np.sqrt(T))
        d2 = d1 - sigma*np.sqrt(T)
        return S*norm.cdf(d1) - K*np.exp(-r*T)*norm.cdf(d2)
    
    # Generate IV range
    iv_range = np.linspace(0.1, 1.0, 100)
    option_prices = [black_scholes_call(spot_price, strike_price, time_to_expiry, 
                                      risk_free_rate, iv) for iv in iv_range]
    
    # Create the plot
    plt.figure(figsize=(12, 7))
    plt.plot(iv_range * 100, option_prices, 'b-', linewidth=2)
    
    # Add labels and title
    plt.title('Impact of Implied Volatility on Option Price', fontsize=14, pad=15)
    plt.xlabel('Implied Volatility (%)', fontsize=12)
    plt.ylabel('Option Price', fontsize=12)
    plt.grid(True, alpha=0.3)
    
    # Add annotation for key points
    mid_point = len(iv_range)//2
    plt.annotate('Higher IV = Higher Premium',
                xy=(iv_range[mid_point]*100, option_prices[mid_point]),
                xytext=(iv_range[mid_point]*100+10, option_prices[mid_point]+5),
                arrowprops=dict(facecolor='blue', shrink=0.05))
    
    plt.tight_layout()
    plt.show()  # Explicitly show the plot

# Call the functions to generate plots
plot_straddle_payoff()
plot_iv_impact()

In [None]:
!pip install seaborn