<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Python for Financial Data Science

Dr Yves J Hilpisch | The Python Quants GmbH

http://tpq.io | <a href="mailto:training@tpq.io">training@tpq.io</a>


<img src="http://hilpisch.com/images/py4fi_2nd.png" width="35%" align="left">

# Financial Time Series

In [None]:
!git clone https://github.com/tpq-classes/financial_data_science_.git
import sys
sys.path.append('financial_data_science_')


In [None]:
import numpy as np
import pandas as pd
from pylab import mpl, plt
plt.style.use('seaborn-v0_8')
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline

## Financial Data

### Data Import

In [None]:
filename = 'http://hilpisch.com/tr_eikon_eod_data.csv'

In [None]:
data = pd.read_csv(filename,
                   index_col=0,
                   parse_dates=True)

In [None]:
data.info()

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.plot(figsize=(10, 12), subplots=True);

In [None]:
instruments = ['Apple Stock', 'Microsoft Stock',
               'Intel Stock', 'Amazon Stock', 'Goldman Sachs Stock',
               'SPDR S&P 500 ETF Trust', 'S&P 500 Index',
               'VIX Volatility Index', 'EUR/USD Exchange Rate',
               'Gold Price', 'VanEck Vectors Gold Miners ETF',
               'SPDR Gold Trust']

In [None]:
for pari in zip(data.columns, instruments):
    print('{:8s} | {}'.format(pari[0], pari[1]))

### Summary Statistics

In [None]:
data.info()

In [None]:
data.describe().round(2)

In [None]:
data.mean()

In [None]:
data.aggregate([min,
                np.mean,
                np.std,
                np.median,
                max]
).round(2)

### Changes Over Time

In [None]:
data.diff().head()

In [None]:
data.diff().mean()

In [None]:
data.pct_change().round(3).head()

In [None]:
data.pct_change().mean().plot(kind='bar', figsize=(10, 6));

In [None]:
rets = np.log(data / data.shift(1))

In [None]:
rets.head().round(3)

In [None]:
rets.cumsum().apply(np.exp).plot(figsize=(10, 6));

### Resampling

In [None]:
data.resample('1w', label='right').last().head()

In [None]:
data.resample('1m', label='right').last().head()

In [None]:
rets.cumsum().resample('1m', label='right').last(
                          ).plot(figsize=(10, 6));

## Excursion: Simple vs. Log Returns

In [None]:
S0 = 100
S1 = 110

In [None]:
S1 / S0

In [None]:
sr1 = S1 / S0 - 1
sr1

In [None]:
S2 = 125

In [None]:
sr2 = S2 / S1 - 1
sr2

In [None]:
S0 * (1 + (sr1 + sr2))  # wrong

In [None]:
S0 * (1 + sr1) * (1 + sr2)

In [None]:
import math

In [None]:
lr1 = math.log(S1 / S0)
lr1

In [None]:
lr2 = math.log(S2 / S1)
lr2

In [None]:
S0 * math.exp(lr1) * math.exp(lr2)

In [None]:
S0 * math.exp(lr1 + lr2)

## Rolling Statistics

In [None]:
sym = 'AAPL.O'

In [None]:
data = pd.DataFrame(data[sym]).dropna()

In [None]:
data.tail()

### An Overview

In [None]:
window = 20

In [None]:
data['min'] = data[sym].rolling(window=window).min()

In [None]:
data['mean'] = data[sym].rolling(window=window).mean()

In [None]:
data['std'] = data[sym].rolling(window=window).std()

In [None]:
data['median'] = data[sym].rolling(window=window).median()

In [None]:
data['max'] = data[sym].rolling(window=window).max()

In [None]:
data['ewma'] = data[sym].ewm(halflife=0.5, min_periods=window).mean()

In [None]:
data.dropna().head()

In [None]:
ax = data[['min', 'mean', 'max']].iloc[-200:].plot(
    figsize=(10, 6), style=['g--', 'r--', 'g--'], lw=0.8)
data[sym].iloc[-200:].plot(ax=ax, lw=2.0);

In [None]:
ax = data[['min', 'mean', 'max', 'std']].iloc[-200:].plot(
    figsize=(10, 6), style=['g--', 'r--', 'g--', 'm-.'], lw=0.8, secondary_y='std')
data[sym].iloc[-200:].plot(ax=ax, lw=2.0);

### A Technical Analysis Example

In [None]:
data['SMA1'] = data[sym].rolling(window=42).mean()

In [None]:
data['SMA2'] = data[sym].rolling(window=252).mean()

In [None]:
data[[sym, 'SMA1', 'SMA2']].tail()

In [None]:
data[[sym, 'SMA1', 'SMA2']].plot(figsize=(10, 6));

In [None]:
data.dropna(inplace=True)

In [None]:
data['positions'] = np.where(data['SMA1'] > data['SMA2'],
                             1,
                             -1)

In [None]:
ax = data[[sym, 'SMA1', 'SMA2', 'positions']].plot(figsize=(10, 6),
                                              secondary_y='positions')
ax.get_legend().set_bbox_to_anchor((0.25, 0.85));

## Regression Analysis

### The Data

In [None]:
# EOD data from Thomson Reuters Eikon Data API
raw = pd.read_csv('http://hilpisch.com/tr_eikon_eod_data.csv',
                 index_col=0, parse_dates=True)

In [None]:
data = raw[['.SPX', '.VIX']]

In [None]:
data.tail()

In [None]:
data.plot(subplots=True, figsize=(10, 6));

In [None]:
data.loc[:'2012-12-31'].plot(secondary_y='.VIX', figsize=(10, 6));

### Log Returns

In [None]:
rets = np.log(data / data.shift(1)) 

In [None]:
rets.head(5)

In [None]:
rets.dropna(inplace=True)

In [None]:
rets.plot(subplots=True, figsize=(10, 6));

In [None]:
pd.plotting.scatter_matrix(rets,
                           alpha=0.2,
                           diagonal='hist',
                           hist_kwds={'bins': 35},
                           figsize=(10, 6));

### OLS Regression

In [None]:
reg = np.polyfit(rets['.SPX'], rets['.VIX'], deg=1)

In [None]:
reg

In [None]:
ax = rets.plot(kind='scatter', x='.SPX', y='.VIX', figsize=(10, 6))
ax.plot(rets['.SPX'], np.polyval(reg, rets['.SPX']), 'r', lw=2);

### Correlation

In [None]:
rets.corr()

In [None]:
ax = rets['.SPX'].rolling(window=252).corr(
                  rets['.VIX']).plot(figsize=(10, 6))
ax.axhline(rets.corr().iloc[0, 1], c='r');

## High Frequency Data

In [None]:
%%time
# data from FXCM Forex Capital Markets Ltd.
tick = pd.read_csv('http://hilpisch.com/fxcm_eur_usd_tick_data.csv',
                     index_col=0, parse_dates=True)

In [None]:
tick.info()

In [None]:
tick.head()

In [None]:
tick['Mid'] = tick.mean(axis=1)

In [None]:
tick['Mid'].plot(figsize=(10, 6));

In [None]:
tick_resam = tick.resample(rule='15s', label='right').last()

In [None]:
tick_resam.info()

In [None]:
tick_resam.head()

In [None]:
tick_resam['Mid'].plot(figsize=(10, 6));

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:training@tpq.io">training@tpq.io</a>