# Code that grabs all the index data

In [31]:
import pandas as pd
import yfinance as yf
import datetime as dt
import statsmodels.api as sm

# Your DataFrame setup
indexes = ['SPY', 'VTI', 'IVV', 'QQQ', 'VXUS', 'IJH', 'IJR', 'VEA', 'VWO', 'DIA']
inception_dates = ['1993-1-24', '2001-5-25', '2000-5-16', '1999-3-10', '2011-1-27',
                   '2000-5-23', '2000-5-23', '2007-7-21', '2005-3-5', '1998-1-15']
df = pd.DataFrame({'Index': indexes, 'Inception_Date': inception_dates})

# Function to get all the monthly returns
def get_monthly_returns(df):
    all_returns = []
    for _, row in df.iterrows():
        ticker = row['Index']
        inception_date = row['Inception_Date']
        start = pd.to_datetime(inception_date)
        end = dt.datetime.today().strftime('%Y-%m-%d')
        data = yf.download(ticker, start=start, end=end)
        if not data.empty:
            monthly_prices = data['Adj Close'].resample('M').last()
            monthly_returns = monthly_prices.pct_change()
            monthly_returns = monthly_returns.dropna().reset_index()
            monthly_returns.rename(columns={'Date': 'Date', 'Adj Close': 'Monthly_Return'}, inplace=True)
            monthly_returns['Index'] = ticker
            monthly_returns['Inception_Date'] = inception_date  # Include inception date
            all_returns.append(monthly_returns)
    final_df = pd.concat(all_returns, ignore_index=True)
    return final_df

# Function to calculate financial metrics using ARIMA
def calculate_financial_metrics_arima(df):
    results_list = []
    tickers = df['Index'].unique()
    for ticker in tickers:
        # Filter data for the current ticker
        ticker_data = df[df['Index'] == ticker]['Monthly_Return']
        # Attempt to fit an ARIMA model
        try:
            model = sm.tsa.ARIMA(ticker_data, order=(1, 0, 0))  # AR(1) model
            fitted_model = model.fit()
            forecast = fitted_model.forecast(steps=12).sum()  # Sum of 12 months forecast
        except Exception as e:
            print(f"Failed to fit ARIMA model for {ticker}: {str(e)}")
            forecast = np.nan

        # Calculate the annualized standard deviation and variance
        monthly_std = ticker_data.std()
        annualized_std = monthly_std * np.sqrt(12)
        variance = monthly_std ** 2

        # Get inception date
        inception_date = df[df['Index'] == ticker]['Inception_Date'].iloc[0]

        # Store results
        results_list.append({
            'Ticker': ticker,
            'Inception_Date': inception_date,
            'Expected_Annual_Return': forecast,
            'Annualized_Std': annualized_std,
            'Variance': variance})
    # Create and return the results DataFrame
    results_df = pd.DataFrame(results_list)
    return results_df
    
# Fetch monthly returns
resulting_df = get_monthly_returns(df)
# Calculate financial metrics using ARIMA
final_metrics_df = calculate_financial_metrics_arima(resulting_df)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_i

In [30]:
final_metrics_df

Unnamed: 0,Ticker,Inception_Date,Expected_Annual_Return,Annualized_Std,Variance
0,SPY,1993-1-24,0.108207,0.149159,0.001854
1,VTI,2001-5-25,0.093942,0.15578,0.002022
2,IVV,2000-5-16,0.08244,0.152878,0.001948
3,QQQ,1999-3-10,0.117275,0.23731,0.004693
4,VXUS,2011-1-27,0.055745,0.154592,0.001992
5,IJH,2000-5-23,0.103983,0.180087,0.002703
6,IJR,2000-5-23,0.10871,0.195697,0.003191
7,VEA,2007-7-21,0.046354,0.18343,0.002804
8,VWO,2005-3-5,0.079914,0.209692,0.003664
9,DIA,1998-1-15,0.093527,0.152234,0.001931


In [35]:
tickers = ['SPY', 'VTI', 'IVV', 'QQQ', 'VXUS', 'IJH', 'IJR', 'VEA', 'VWO', 'DIA']
dividend_yields = [.0132, .0140, .0130, .0060, .0330, .0135, .0134, .0335, .0344, .0180]
df = pd.DataFrame({
    'Ticker': tickers,
    'Dividend Yield (%)': dividend_yields})
df_merged = pd.merge(final_metrics_df, df, on='Ticker', how='inner')
df_merged


Unnamed: 0,Ticker,Inception_Date,Expected_Annual_Return,Annualized_Std,Variance,Dividend Yield (%)
0,SPY,1993-1-24,0.108207,0.149159,0.001854,0.0132
1,VTI,2001-5-25,0.093942,0.15578,0.002022,0.014
2,IVV,2000-5-16,0.08244,0.152878,0.001948,0.013
3,QQQ,1999-3-10,0.117275,0.23731,0.004693,0.006
4,VXUS,2011-1-27,0.055745,0.154592,0.001992,0.033
5,IJH,2000-5-23,0.103983,0.180087,0.002703,0.0135
6,IJR,2000-5-23,0.10871,0.195697,0.003191,0.0134
7,VEA,2007-7-21,0.046354,0.18343,0.002804,0.0335
8,VWO,2005-3-5,0.079914,0.209692,0.003664,0.0344
9,DIA,1998-1-15,0.093527,0.152234,0.001931,0.018


In [36]:
# Saves data frame to folder
df_merged.to_csv('Index_Data/index_data.csv', index=False)