## Import Statements

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

from util import plot_data

In [None]:
def symbol_to_path(symbol, base_dir="data"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

In [None]:
def get_data(symbols, dates, addSPY=True, colname="Adj Close"):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if addSPY and "SPY" not in symbols:  # add SPY for reference, if absent 
        symbols = ["SPY"] + list(
            symbols
        )  # handles the case where symbols is np array of 'object'   

    for symbol in symbols:
        df_temp = pd.read_csv(
            symbol_to_path(symbol),
            index_col="Date",
            parse_dates=True,
            usecols=["Date", colname],
            na_values=["nan"],
        )
        df_temp = df_temp.rename(columns={colname: symbol})
        df = df.join(df_temp)
        if symbol == "SPY":  # drop dates SPY did not trade
            df = df.dropna(subset=["SPY"])

    return df

## Plot a histogram

In [None]:
def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = df.copy()
    daily_returns[1:] = (df[1:] / df[:-1].values) - 1
    daily_returns.iloc[0, :] = 0 # set daily returns for row 0 to 0
    return daily_returns

In [None]:
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY']
df = get_data(symbols, dates)
plot_data(df)

# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

# Plot a histogram
daily_returns.hist()  # default number of bins, 10
daily_returns.hist(bins=20)  # changing no. of bins to 20
plt.show()

## Computing Histogram Statistics

In [None]:
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY']
df = get_data(symbols, dates)
plot_data(df)

# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

# Plot a histogram
daily_returns.hist(bins=20)  # changing no. of bins to 20

# Get mean and standard deviation
mean = daily_returns['SPY'].mean()
print ("mean=", mean)
std = daily_returns['SPY'].std()
print ("std=", std)

plt.axvline(mean, color='w', linestyle='dashed', linewidth=2)
plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
plt.show()

# Compute kurtosis
print (daily_returns.kurtosis())

## Plot Two Histograms together

In [None]:
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY', 'XOM']
df = get_data(symbols, dates)
plot_data(df)

""" Two separate histograms ==========="""
# Compute daily returns
daily_returns = compute_daily_returns(df)
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

# Plot a histogram
daily_returns.hist(bins=20) 
plt.show()

""" Histograms on the same graph ======"""
# Compute daily returns
daily_returns = compute_daily_returns(df)

# Compute and plot both histograms on the same chart
daily_returns['SPY'].hist(bins=20, label="SPY")
daily_returns['XOM'].hist(bins=20, label="XOM")
plt.legend(loc='upper right')
plt.show()

## Scatterplots

In [None]:
def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = df.copy()
    daily_returns[1:] = (df[1:] / df[:-1].values) - 1
    daily_returns.iloc[0, :] = 0 # set daily returns for row 0 to 0
    return daily_returns

In [None]:
# Read data
dates = pd.date_range('2009-01-01', '2012-12-31')
symbols = ['SPY', 'XOM', 'GLD']
df = get_data(symbols, dates)

# Compute daily returns
daily_returns = compute_daily_returns(df)

# Scatterplot SPY vs XOM
daily_returns.plot(kind='scatter', x='SPY', y='XOM')
beta_XOM, alpha_XOM= np.polyfit(daily_returns['SPY'], daily_returns['XOM'], 1)
print ("beta_XOM= ", beta_XOM)
print ("alpha_XOM=", alpha_XOM)
plt.plot(daily_returns['SPY'], beta_XOM*daily_returns['SPY'] + alpha_XOM, '-',color='r')
plt.show()

# Scatterplot SPY vs GLD
daily_returns.plot(kind='scatter', x='SPY', y='GLD')
beta_GLD, alpha_GLD= np.polyfit(daily_returns['SPY'], daily_returns['GLD'], 1)
print ("beta_GLD= ", beta_GLD)
print ("alpha_GLD=", alpha_GLD)
plt.plot(daily_returns['SPY'], beta_GLD*daily_returns['SPY'] + alpha_GLD, '-',color='r')
plt.show()