# S&P 500 Index Daily Data.
- Fetches daily S&P 500 data from Yahoo Finance from end of 1927 to Today. (As data available from 1927 only).

In [1]:
#!pip install yfinance

In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime
import os

In [3]:
gspc_historical_df = yf.download('^GSPC')

[*********************100%***********************]  1 of 1 completed


## Fixing Open Price = 0.0
- Note for some dates, open price = 0.0
- Although not ideal, I will assume that opening price of trading day = closing price of previous trading day

In [4]:
test = yf.download('^GSPC')

[*********************100%***********************]  1 of 1 completed


In [5]:
test = test.reset_index()

In [6]:
test.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1927-12-30,17.66,17.66,17.66,17.66,17.66,0
1,1928-01-03,17.76,17.76,17.76,17.76,17.76,0
2,1928-01-04,17.719999,17.719999,17.719999,17.719999,17.719999,0
3,1928-01-05,17.549999,17.549999,17.549999,17.549999,17.549999,0
4,1928-01-09,17.5,17.5,17.5,17.5,17.5,0


The open price =0.0 appears to happen between 1962 jan 1 to 1982 april 10. See if this is the case for all rows within this time range.

In [7]:
test_noopen_indices = test[test["Open"]==0].index

In [8]:
for index in test_noopen_indices:
    test.iloc[index,test.columns.get_loc('Open')] = \
    test.iloc[index-1]["Close"]

In [9]:
len(test[test['Open'] == 0])

0

#### Applying it to the actual dataset:

In [10]:
gspc_historical_df = gspc_historical_df.reset_index()

In [11]:
gspc_noopen_indices = gspc_historical_df[gspc_historical_df['Open']==0].index

In [12]:
for index in gspc_noopen_indices:
    gspc_historical_df.iloc[index , gspc_historical_df.columns.get_loc('Open')] = \
    gspc_historical_df.iloc[index-1]["Close"]

In [13]:
len(gspc_historical_df[gspc_historical_df['Open']==0])

0

## Adding Calculated Columns
In particular:
- % Change in Stock Closing: Gain/ Loss (As opening price information is not accurate for all dates)
- % Daily Price variation

In [14]:
change_closing_arr = np.array([0]) #For 1927-12-30
change_closing_arr = np.append(change_closing_arr,
                    [gspc_historical_df.iloc[i]['Close']-gspc_historical_df.iloc[i-1]['Close'] \
                    for i in range(1,len(gspc_historical_df))])

In [15]:
change_closing_arr

array([ 0.00000000e+00,  1.00000381e-01, -4.00009155e-02, ...,
        3.76503906e+01, -6.95019531e+00, -4.07597656e+01])

In [16]:
change_closing_percent = change_closing_arr*100 / gspc_historical_df['Close']

In [17]:
gspc_historical_df["% Gain/Loss (Close)"] = change_closing_percent

In [18]:
gspc_historical_df["% Price Variation"] = (gspc_historical_df['High']-gspc_historical_df['Low'])/gspc_historical_df['Close']

In [19]:
gspc_historical_df.set_index("Date", inplace = True)
#Put back date as index
decimals = pd.Series([2,2,2,2,2,0,4,3],index = gspc_historical_df.columns)
#Round to these decimals

In [20]:
gspc_historical_df = gspc_historical_df.round(decimals)

#### Exporting to CSV

In [21]:
file_name = "SPX_500_Data.csv"
path = os.path.join(os.getcwd(),file_name)
gspc_historical_df.to_csv(path)