# Financial Time Series Data in Python

We examine how to load and manipulate time series data in Pandas

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import datetime as dt

In [19]:
pd.core.common.is_list_like = pd.api.types.is_list_like

from pandas_datareader import data as pdr

# use pip install fix_yahoo_finance first
import fix_yahoo_finance as yf
yf.pdr_override() # <== that's all it takes :-)

In [12]:
start = dt.datetime(2013, 1, 1)
end = dt.datetime(2016, 1, 27)
tickers = ['AAPL', 'AMZN', 'BA', 'BLCM', 'CHK', 'FB', 'MSFT', 'XRX', '^GSPC']

df = pdr.get_data_yahoo(tickers, start, end)

[*********************100%***********************]  9 of 9 downloaded


In [17]:
type(df)

pandas.core.frame.DataFrame

## Loading the Data

In [13]:
# Pickle is a compressed format which works well for large datasets
# I have loaded some data and previously stored in it a Pickle file
fileName = "./data/timeSeriesData.pkl"
panel_data = pd.read_pickle(fileName)

In [6]:
panel_data

<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 1009 (major_axis) x 9 (minor_axis)
Items axis: Adj Close to Volume
Major_axis axis: 2012-12-31 00:00:00 to 2016-12-30 00:00:00
Minor_axis axis: AAPL to ^GSPC

In [18]:
type(panel_data)

pandas.core.panel.Panel

In [7]:
panel_data.items

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [11]:
panel_data.minor_axis

Index(['AAPL', 'AMZN', 'BA', 'BLCM', 'CHK', 'FB', 'MSFT', 'XRX', '^GSPC'], dtype='object')

## Close Price

In [None]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
start_date = '2013-01-01'
end_date = '2016-12-31'

# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

In [None]:
all_weekdays

In [None]:
close = panel_data['Close']
# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex close using all_weekdays as the new index
close = close.reindex(all_weekdays)

close.head(10)

In [None]:
ts = panel_data['Close']['AAPL']

In [None]:
from matplotlib import pyplot 

In [None]:
plt.figure(figsize=(10,8))
ts.plot()

## The Adjusted Close Price
This is adjusted to take into account stock splits and dividends

In [None]:
# Getting just the adjusted closing prices. This will return a Pandas DataFrame
# The index in this DataFrame is the major index of the panel_data.
adj_close = panel_data['Adj Close']

# Getting all weekdays between 01/01/2000 and 12/31/2016
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

# How do we align the existing prices in adj_close with our new set of dates?
# All we need to do is reindex adj_close using all_weekdays as the new index
adj_close = adj_close.reindex(all_weekdays)

# Reindexing will insert missing values (NaN) for the dates that were not present
# in the original set. To cope with this, we can fill the missing by replacing them
# with the latest available price for each instrument.
adj_close = adj_close.fillna(method='ffill')

In [None]:
all_weekdays

In [None]:
adj_close.head(7)

In [None]:
plt.figure(figsize=(10,8))
adj_close['AAPL'].plot(label="Adjusted Close")
close['AAPL'].plot(label="Close")
plt.legend()

## Moving Averages

In [None]:
# We can examine the data
adj_close.describe()

In [None]:
# Get the MSFT time series. This now returns a Pandas Series object indexed by date.
msft = adj_close['MSFT']

In [None]:
# Calculate the 20 and 100 days moving averages of the closing prices
short_rolling_msft = msft.rolling(window=20).mean()
long_rolling_msft = msft.rolling(window=100).mean()

In [None]:
# Plot everything by leveraging the very powerful matplotlib package
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(1,1,1)
ax.plot(msft.index, msft, label='MSFT')
ax.plot(short_rolling_msft.index, short_rolling_msft, label='20 days rolling')
ax.plot(long_rolling_msft.index, long_rolling_msft, label='100 days rolling')
ax.set_xlabel('Date')
ax.set_ylabel('Adjusted closing price ($)')
ax.legend()

## Correlations

In [None]:
adj_close_rets = adj_close.pct_change()

In [None]:
adj_close_rets

In [None]:
fig = plt.figure(figsize=(10,8))
pd.plotting.scatter_matrix(adj_close_rets, diagonal='kde', figsize=(10, 10));

In [None]:
corr = adj_close_rets.corr()
corr

In [None]:
fig = plt.figure(figsize=(10,8))
plt.imshow(corr, cmap='hot', interpolation='none')
plt.colorbar()
plt.xticks(range(len(corr)), corr.columns)
plt.yticks(range(len(corr)), corr.columns);