<a href="https://colab.research.google.com/github/radosty/radosty.github.io/blob/main/S%26P500%2Cnasdaqstockdata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import yfinance as yf
import pandas as pd

# List of indices
tickers = ['^GSPC', '^IXIC']  # S&P 500 and NASDAQ

# Define the date range for 2021
start_date = '2021-01-01'
end_date = '2021-12-31'

# Create an empty DataFrame to store all index data
all_index_data = pd.DataFrame()

for ticker in tickers:
    try:
        # Create a Ticker object
        index = yf.Ticker(ticker)

        # Fetch historical data
        hist = index.history(start=start_date, end=end_date, interval="1mo")

        if hist.empty:
            print(f"No data for {ticker}, skipping.")
            continue

        # Reset index to make Date a column
        hist.reset_index(inplace=True)

        # Replace ticker symbol with descriptive names
        hist['Ticker'] = 'S&P 500' if ticker == '^GSPC' else 'NASDAQ'

        # Calculate monthly return
        hist['Monthly Return (%)'] = hist['Close'].pct_change() * 100

        # Select relevant columns
        hist = hist[['Date', 'Ticker', 'Close', 'Monthly Return (%)']]

        # Append to the main DataFrame
        all_index_data = pd.concat([all_index_data, hist], ignore_index=True)

    except Exception as e:
        print(f"Error processing {ticker}: {e}")

# Drop any missing values from the DataFrame
all_index_data.dropna(inplace=True)

# Display the first few rows of the combined data after dropping missing values
print(all_index_data.head())

# Save the raw data to CSV without missing values
all_index_data.to_csv("raw_index_data_2021.csv", index=False)
print("\nRaw index data saved.")

# Calculate average monthly performance across indices
average_monthly_performance = all_index_data.groupby(['Date', 'Ticker']).agg({
    'Close': 'mean',
    'Monthly Return (%)': 'mean'
}).reset_index()

# Drop any missing values from the average performance DataFrame
average_monthly_performance.dropna(inplace=True)

# Save the averaged performance data without missing values
average_monthly_performance.to_csv("average_index_performance_2021.csv", index=False)
print("Average index performance data saved.")


                       Date   Ticker        Close  Monthly Return (%)
1 2021-02-01 00:00:00-05:00  S&P 500  3811.149902            2.609145
2 2021-03-01 00:00:00-05:00  S&P 500  3972.889893            4.243863
3 2021-04-01 00:00:00-04:00  S&P 500  4181.169922            5.242532
4 2021-05-01 00:00:00-04:00  S&P 500  4204.109863            0.548649
5 2021-06-01 00:00:00-04:00  S&P 500  4297.500000            2.221401

Raw index data saved.
Average index performance data saved.
