In this notebook we first look at historical gains of the Standard & Poor's 500 stock market index, especially after the second World War. Later we compare the gains from S&P 500 with other investments.

# S&P analysis

## Load libraries, data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

In [None]:
spx = pd.read_csv('../input/sp-500-historical-data/SPX.csv', parse_dates = [0])
spx.head()

## Price evolution since 1928

In [None]:
plt.plot(
        matplotlib.dates.date2num(spx['Date']), 
        spx['Adj Close'],
        color = 'k'
)

#Format x axis
ax = plt.gca()
ax.xaxis.set_major_locator(   matplotlib.dates.YearLocator(10) )
ax.xaxis.set_major_formatter( matplotlib.dates.DateFormatter('%Y') )

#Format y axis
plt.ylabel('SPX 500')
plt.yscale('log')

## Situation after the second world war

In [None]:
spx_after_war = spx[ spx['Date'] > np.datetime64('1945') ]

In [None]:
t     = matplotlib.dates.date2num(spx_after_war['Date'])
log_p = np.log10(spx_after_war['Adj Close'])

fit   = np.polyfit(t, log_p, deg = 1)

typical_daily_appreciation  = fit[0]
typical_annual_appreciation = 10**(365*typical_daily_appreciation) - 1.

print(f'Typical daily appreciation is {100*typical_daily_appreciation:.2f}%')
print(f'Typical annual appreciation is {100*typical_annual_appreciation:.2f}%')

In [None]:
plt.plot(
            matplotlib.dates.date2num(spx_after_war['Date']), 
            spx_after_war['Adj Close'],
            color = 'k',
            label = 'S&P 500'
)
plt.plot(
            matplotlib.dates.date2num(spx_after_war['Date']), 
            10**(fit[0]*t + fit[1]),
            color = 'r',
            ls = '--',
            label = 'Fit'
)

plt.legend()

ax = plt.gca()
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator(10))
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y'))

plt.ylabel('SPX 500')
plt.yscale('log')

## Distribution of daily gains

In [None]:
daily_fractional_gain = spx_after_war['Adj Close'][1:].values / spx_after_war['Adj Close'][:-1].values - 1

plt.hist(daily_fractional_gain, bins = 40, log = True)
plt.xlabel('Daily fractional change');

**We see a pronounced tail to negative gains.**

In [None]:
for pct in [1, 2, 5, 10]:
    prob_daily_over_Xpct = sum( abs(daily_fractional_gain) > pct/100 )/len( daily_fractional_gain )
    print(f'Probability of seeing {pct}+% daily change is {100*prob_daily_over_Xpct:.2f}%')

In [None]:
prob_daily_up = sum(daily_fractional_gain > 0)/len(daily_fractional_gain)
print(f'Probability of market going up on a random day is {100*prob_daily_up:.1f}%')


# Annual gains

In [None]:
spx_years = np.arange(1928, 2021)
spx_at_beginning_of_the_year = np.array([
    spx[spx.Date.dt.year == year]['Adj Close'].values[0]
    for 
    year in spx_years
])

In [None]:
spx_annual_gains = np.array([
    spx_at_beginning_of_the_year[idx+1]/spx_at_beginning_of_the_year[idx]-1
    for 
    idx in range(len(spx_at_beginning_of_the_year)-1)
])

In [None]:
plt.hist(spx_annual_gains, 20)
plt.title('Distribution of SPX 500 annual gains')
plt.show()

In [None]:
plt.bar(spx_years[:-1], spx_annual_gains)
plt.ylabel('SPX annual gain')
plt.title('Annual gains of SPX 500')
plt.show()

## What is the maximal drop anyone ever suffered if holding indefinitely?

In [None]:
max_drop_ever = 0 #Will be negative
date_of_max_drop_ever = None

for idx in range(len(spx)):
    #Assume we buy on the day's high and hold. What is the lowest we would see the price go?
    buy_price    = spx['High'][idx]
    lowest_price = np.min(spx['Low'][idx:])
    max_drop     = lowest_price / buy_price - 1
    if max_drop < max_drop_ever:
        max_drop_ever         = max_drop
        date_of_max_drop_ever = spx['Date'][idx].date()
        
print(f'If we bought on {date_of_max_drop_ever} and held, we would at one point lost {-100*max_drop_ever:.0f}% of the initial investment')

# Compare with other investments

In [None]:
gold = pd.read_csv('../input/gold-prices/monthly_csv.csv', parse_dates = [0])

In [None]:
#Allow pandas to process MS Excel files
!pip install xlrd

In [None]:
from calendar import isleap #Check whether year is leap
import datetime

def decimal_year_to_datetime(year):
    """
    Takes in a float such as 1980.45 and converts it into a date
    """
    whole_year    = int(year)
    residual      = year - whole_year
    days_in_year  = 366. if isleap(whole_year) else 365.
    residual_days = residual*days_in_year
    
    return datetime.datetime(year = whole_year, month = 1, day = 1) + datetime.timedelta(days = residual_days)

In [None]:
housing = pd.read_excel('../input/housing-market-data-used-inirrational-exuberance/House prices.xls', 
                        skiprows = 6,
                        usecols = [0,8])
housing.columns = ['Date', 'Nominal Index']
housing['Date'] = [decimal_year_to_datetime(d) for d in housing['Date']]
housing.head()

In [None]:
russell2000 = pd.read_csv('../input/russell-2000/russell_2000.csv', parse_dates = [0])
russell2000.head()

In [None]:
plt.plot(
        matplotlib.dates.date2num(spx['Date']), 
        spx['Adj Close'],
        color = 'k',
        label = 'S&P 500'
)
plt.plot(
        matplotlib.dates.date2num(gold['Date']), 
        gold['Price'],
        color = 'orange',
        label = 'Gold'
)
plt.plot(
        matplotlib.dates.date2num(housing['Date']), 
        housing['Nominal Index'],
        color = 'Green',
        label = 'Housing'
)
plt.plot(
        matplotlib.dates.date2num(russell2000['Date']), 
        russell2000['Close'],
        color = 'Red',
        label = 'Russel 2000'
)

plt.legend(loc = 2)

#Format x axis
ax = plt.gca()
ax.xaxis.set_major_locator(   matplotlib.dates.YearLocator(20) )
ax.xaxis.set_major_formatter( matplotlib.dates.DateFormatter('%Y') )

#Format y axis
plt.ylabel('SPX 500')
plt.yscale('log')