<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Mathematics Basics

**With `pandas`**

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

http://tpq.io | [training@tpq.io](mailto:trainin@tpq.io) | [@dyjh](http://twitter.com/dyjh)

In [None]:
!git clone https://github.com/tpq-classes/mathematics_basics.git
import sys
sys.path.append('mathematics_basics')


In [None]:
import numpy as np
import pandas as pd

## `DatetimeIndex` Objects

In [None]:
dir(pd.tseries.offsets)[:5]

In [None]:
start = '2021-01-01'
end = '2021-12-31'
freq = 'D'
periods = 365

In [None]:
index = pd.date_range(start, end, freq=freq)

In [None]:
index

In [None]:
index = pd.date_range(start, periods=periods, freq=freq)

In [None]:
index

In [None]:
index = pd.date_range(start, periods=52, freq='W')

In [None]:
index

In [None]:
index = pd.date_range(start, periods=12, freq='M')

In [None]:
index

In [None]:
index = pd.date_range(start, periods=12, freq='MS')

In [None]:
index

In [None]:
index = pd.date_range(start, periods=12, freq='5min')

In [None]:
index

In [None]:
index = pd.date_range(start, periods=12, freq='4h')

In [None]:
index

## Time Series Data

In [None]:
np.set_printoptions(suppress=True)
from numpy.random import default_rng

In [None]:
rng = default_rng(1000)

In [None]:
rn = rng.standard_normal(364)

In [None]:
rn = np.insert(rn, 0, 0.)

In [None]:
rn[:10]

In [None]:
index = pd.date_range(start, end, freq=freq)

In [None]:
index[:10]

In [None]:
s = pd.Series(rn.cumsum(), index=index)

In [None]:
s

In [None]:
from pylab import plt
plt.style.use('seaborn-v0_8')
%config InlineBackend.figure_format = 'svg'

In [None]:
s.plot();

## Indexing & Selection

In [None]:
s

In [None]:
s[0]

In [None]:
s.loc['2021-01-01']

In [None]:
s[1]

In [None]:
s.loc['2021-01-02']

In [None]:
s[0:4]

In [None]:
s.loc['2021-01-01':'2021-01-04']

In [None]:
s[:4]

In [None]:
s.loc[:'2021-01-04']

In [None]:
s[3:6]

In [None]:
s.loc['2021-01-04':'2021-01-06']

In [None]:
s[5:]

In [None]:
s.loc['2021-01-05':]

## Vectorized Arithmetics

In [None]:
a = 1 / 2

In [None]:
a * s

In [None]:
s + s

In [None]:
s * s

## Statistics

In [None]:
s.sum()

In [None]:
s.mean()

In [None]:
s.var()

In [None]:
s.std()

In [None]:
s.min()

In [None]:
s.max()

In [None]:
s.median()

## Rolling Statistics

In [None]:
window = 20

In [None]:
r = s.rolling(window).mean()

In [None]:
r

In [None]:
# r.head(20)

In [None]:
s.plot()
r.plot(label='SMA', legend=True);

In [None]:
r = s.rolling(window).median()

In [None]:
r

In [None]:
s.plot()
r.plot(label='MEDIAN', legend=True);

In [None]:
r = s.rolling(window).min()

In [None]:
r

In [None]:
s.plot()
r.plot(label='MIN', legend=True);

In [None]:
r = s.rolling(window).max()

In [None]:
r

In [None]:
s.plot()
r.plot(label='MAX', legend=True);

In [None]:
r = s.rolling(window).std()

In [None]:
r

In [None]:
s.plot()
r.plot(label='STD', legend=True, secondary_y='STD');

In [None]:
r = s.ewm(halflife=5, min_periods=window).mean()

In [None]:
r

In [None]:
s.plot()
r.plot(label='EWMA', legend=True);

## Custom Functions

In [None]:
%time s = pd.Series(rng.standard_normal(int(1e6)).cumsum())

In [None]:
s.head()

In [None]:
s.plot();

### Element-Wise Functions

In [None]:
import math

In [None]:
s.apply(math.cos)

In [None]:
%timeit s.apply(math.cos)

In [None]:
s.apply(np.cos)

In [None]:
%timeit s.apply(np.cos)

In [None]:
s / 2

In [None]:
%timeit s / 2

In [None]:
def f(x):
    return x / 2

In [None]:
s.apply(f)

In [None]:
%timeit s.apply(f)

In [None]:
s.apply(lambda x: x / 2)

In [None]:
%timeit s.apply(lambda x: x / 2)

### `numexpr` Module

In [None]:
np.sqrt(np.abs(s)) + np.sin(s)

In [None]:
%timeit np.sqrt(np.abs(s)) + np.sin(s)

In [None]:
pd.eval('sqrt(abs(s)) + sin(s)')

In [None]:
%timeit pd.eval('sqrt(abs(s)) + sin(s)')

### Recursive Functions

In [None]:
def ewma_py(x, alpha=0.25):
    y = np.zeros_like(x)
    y[0] = x[0]
    for i in range(1, len(x)):
        y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
    return y

In [None]:
a = ewma_py(s)

In [None]:
a

In [None]:
r_py = pd.Series(ewma_py(s))

In [None]:
r_py

In [None]:
s.iloc[-100:].plot()
r_py.iloc[-100:].plot();

In [None]:
%timeit r_py = pd.Series(ewma_py(s))

In [None]:
import numba

In [None]:
ewma_nb = numba.jit(ewma_py)

In [None]:
s.values  # transforming Series object to ndarray object

In [None]:
r_nb = pd.Series(ewma_nb(s.values))

In [None]:
r_nb

In [None]:
(r_py == r_nb).all()  # real equality

In [None]:
np.allclose(r_py, r_nb)  # equality to a certain precision level

In [None]:
%timeit r_nb = pd.Series(ewma_nb(s.values))

## Time Series Data

In [None]:
url = 'https://certificate.tpq.io/GLD.csv'

In [None]:
gld = pd.read_csv(url, index_col=0,
                  parse_dates=True) 

In [None]:
gld = pd.Series(gld['GLD'])

In [None]:
gld.index

In [None]:
gld.plot()
gld.rolling(42).mean().plot(ls='--')
gld.rolling(252).mean().plot(style='-.');

In [None]:
gld.plot(lw=1)
gld.rolling(50).min().plot(ls='--', lw=1)
gld.rolling(50).max().plot(style='-.', lw=1);

## Exporting Data

In [None]:
gld.to_list()[:10]

In [None]:
d = gld.to_dict()

In [None]:
#d

In [None]:
d[pd.Timestamp('2010-01-07')]

In [None]:
for key in list(d.keys())[:5]:
    print(d[key])

In [None]:
gld.to_csv()[:300]

In [None]:
gld.to_markdown()[:300]

In [None]:
gld.to_latex()[:300]

In [None]:
gld.to_excel('GLD.xlsx')

In [None]:
gld.to_json('GLD.json')

## <img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:training@tpq.io">training@tpq.io</a>