In [24]:
import numpy as np

In [25]:
import pandas as pd

In [26]:
import datetime
from pandas_datareader import data, wb # pip install pandas-datareader

ImportError: No module named pandas_datareader

In [70]:
start = datetime.date(2012, 3, 25)
end = datetime.date(2017, 3, 25)

In [71]:
# Reading in Yahoo Finance data with DataReader
df = data.DataReader('AAPL', 'yahoo', start, end)

# Alternatively you can read in the data from a CSV file using 
# pd.read_csv("http://chart.finance.yahoo.com/table.csv?s=AAPL&a=2&b=27&c=2012&d=2&e=27&f=2017&g=d&ignore=.csv")
# instead of the DataReader

NameError: name 'data' is not defined

In [None]:
df.head() 

### Plotting the data

In [None]:
import matplotlib
import matplotlib.pyplot as plt

In [None]:
df["Adj Close"].plot(figsize=(12,8))  # Select a column using df["Column name"] or df.column_name, 
                                      # .plot() automatically creates a plot of the data
plt.show()                            # plt.show() is used to actually show the plot

In [None]:
df.reset_index(inplace=True) # using reset_index() we make the Dates a column, instead of using them as an index

In [None]:
df.head()

In [None]:
import matplotlib.dates as mdates

# Creates a new column which converts Dates to pydatetime
df['date_num'] = df["Date"].apply(lambda date: mdates.date2num(date.to_pydatetime()))

In [None]:
# Creates tuples for each row in the DataFrame with the data values we need for the plot
subset_as_tuples = [tuple(x) for x in df[ ['date_num', 'Open', 'Close', 'High', 'Low']].values]

In [None]:
from matplotlib.dates import DateFormatter
week_formatter = DateFormatter('%b %d') # e.g., Jan 12

from matplotlib.dates import (WeekdayLocator, MONDAY)
mondays = WeekdayLocator(MONDAY) # We want the major ticks in the plot to be mondays

In [None]:
from matplotlib.finance import candlestick_ochl 

# Create a new figure
plt.figure()
fig, ax = plt.subplots(figsize=(12,8))

# Apply the mondays to be the major ticks
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_major_formatter(week_formatter)

# Take the last 20 
subset_as_tuples = subset_as_tuples[-20:]

candlestick_ochl(ax, subset_as_tuples, width=0.6, colorup='g',colordown='r')
plt.show()

In [None]:
df = df.drop("date_num", axis=1) 

## Fundamental financial calculations

In [None]:
# Get the percentage change from out first value to our second
percentage_change_0 = df["Close"].iloc[0]
percentage_change_1 = df["Close"].iloc[1]

percentage_change = percentage_change_1 / percentage_change_0 - 1
print(percentage_change)

In [None]:
# Apply the same formula to the entire DataFrame

Calculations will be done by Adj Close instead of Close, this will make more sense when doing distribution and visualizing it. In reality its probably better to use Close.

In [None]:
import numpy as np
df["DPC"] = np.log(df["Adj Close"].iloc[1:] / df["Adj Close"].iloc[:-1].values)
df['Log_Ret'] = np.log(df["Adj Close"] / df["Adj Close"].shift(1))
df['Momentum'] = (df["Adj Close"] - df["Adj Close"].shift(4))
df['Disparity in 5 days'] = ((df["Adj Close"] / df["Adj Close"].rolling(5).mean() * 100)
df['Disparity in 10 days'] = ((df["Adj Close"] / df["Adj Close"].rolling(10).mean() * 100)
df['OSCP'] = (df["Adj Close"].rolling(5).mean() / df["Adj Close"].rolling(10).mean()) / df["Adj Close"].rolling(5).mean()


In [None]:
df.head()

In [None]:
# Pandas also has a built in function for calculating percent change
df["DPC"] = df["Adj Close"].pct_change()

In [None]:
print (df["Log_Ret"].max())
print (df["DPC"].max())
print (df["Log_Ret"].min())
print (df["DPC"].min())

In [None]:
df.head()

In [None]:
#df.drop("DPC_2", axis=1, inplace=True) 

In [None]:
# Now we have NaN data in our DataFrame, we can choose to use dropna() which
# deletes all rows with NaN / None data, or use fillna() to fill our data with
# a value we choose
df.fillna(0, inplace=True)

In [None]:
df.head()

### Calculating simple daily cumulative returns

This is calculated from the daily percentage change values by multiplying (1 + the current day's percentage change) with the cumulative product of all of the previous values. 

In [None]:
df["DCR"] = (1 + df["Log_Ret"]).cumprod()

In [None]:
df.head()

### Analyzing the distribution of returns

Histograms

In [None]:
print("Max value:", df["Log_Ret"].max())
print("Min value:", df["Log_Ret"].min())
df["Log_Ret"].hist(bins=25, figsize=(12,8))
plt.show()

In [None]:
# Using the describe() function we can get various data from our panadas data structures
df["Adj Close"].describe()

We can create a list with the normalized values within the DataDrame

In [None]:
norm = (df["Log_Ret"] - df["Log_Ret"].mean()) / (df["Log_Ret"].std())

and plot that

In [None]:
print("Max value:", norm.max())
print("Min value:", norm.min())

In [None]:
    norm.hist(bins=25, figsize=(12,8))
    plt.show()

### Moving windows

A number of functions are provided to compute moving (also known asrolling) statistics, where the function computes the statistic on a window of data represented by a particular period of time and then slides the window across the data by a specified interval, continually calculating the statistic as long as the window falls first within the dates of the time-series.

In [None]:
df["3_day_mean"] = df["Adj Close"].rolling(3).mean()
df["5_day_max"] = df["Adj Close"].rolling(5).max()

In [None]:
df.head(10)

### Volatility calculation

The following command calculates the volatility of the stock with a window of 75 days:

In [47]:
min_periods = 75

# We take the standard deviation of the rolling values, and use 
# numpy's square root function np.sqrt()
volatility = df["Log_Ret"].rolling(min_periods).std() * np.sqrt(min_periods)

volatility.plot()
plt.show()

KeyError: 'Log_Ret'