<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# Mathematics Basics

**With `pandas`**

See also `44_math_basics.ipynb`.

&copy; Dr. Yves J. Hilpisch | The Python Quants GmbH

http://tpq.io | [training@tpq.io](mailto:trainin@tpq.io) | [@dyjh](http://twitter.com/dyjh)

In [None]:
!git clone https://github.com/tpq-classes/mathematics_basics.git
import sys
sys.path.append('mathematics_basics')


In [None]:
import numpy as np
import pandas as pd

In [None]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [None]:
from string import ascii_lowercase, ascii_uppercase

## Indexing & Selection

In [None]:
n = 30

In [None]:
m = np.arange(0, n, 1).reshape((10, 3))

In [None]:
m

In [None]:
list(ascii_lowercase[:10])

In [None]:
list(ascii_uppercase[:3])

In [None]:
df = pd.DataFrame(m, index=list(ascii_lowercase[:10]),
                columns=list(ascii_uppercase[:3]))

In [None]:
df

In [None]:
df.iloc[0]

In [None]:
type(df.iloc[0])

In [None]:
df.iloc[1]

In [None]:
# df[1]  # does not work

In [None]:
df[0:4]

In [None]:
df.iloc[0:4]

In [None]:
df[:4]

In [None]:
df.iloc[:4]

In [None]:
df[3:6]

In [None]:
df.iloc[3:6]

In [None]:
df[5:]

In [None]:
df.iloc[5:]

In [None]:
m[-1]  # ndarray object

In [None]:
# df[-1]  # does not work

In [None]:
# df[-2]

In [None]:
df.iloc[-1]

In [None]:
df.iloc[-2]

In [None]:
df.iloc[-4:]

In [None]:
df.iloc[-5:-2]

In [None]:
df.iloc[5:-1]

In [None]:
df.iloc[6:2:-1]

In [None]:
df.iloc[6::-1]

In [None]:
df.iloc[:-5:-1]

In [None]:
df.iloc[2, 1]  # index, column

In [None]:
df.iloc[1:5, 0:2]  # index, column

In [None]:
df.loc['a']

In [None]:
df.iloc[:4]  # index position 4 excluded

In [None]:
df.iloc[4:]

In [None]:
df.loc[:'d']  # index key 'd' included

In [None]:
df.loc['d':]

In [None]:
df.loc['c':'f']

In [None]:
df['A']

In [None]:
df.loc['a':'c', 'A']  # index, column

In [None]:
df.loc['a':'d', 'A':'B']

## Vectorized Arithmetics

In [None]:
np.set_printoptions(suppress=True)

In [None]:
a = 1 / 2

In [None]:
a * df

In [None]:
df + df

In [None]:
df + m

In [None]:
df / a

In [None]:
df * df

In [None]:
# 10 / 0  # ZeroDivisionError

In [None]:
r = df / df  # does not raise ZDE

In [None]:
r

In [None]:
r.sum()  # works despite NaN

In [None]:
r.mean()  # works despite NaN

In [None]:
df + 2

In [None]:
df ** 2

In [None]:
df ** a

In [None]:
a ** df

In [None]:
df ** (df / 100)

In [None]:
large = 29 ** 29

In [None]:
large

In [None]:
large.bit_length()

In [None]:
# sum(df)  # does not work (2d object)

In [None]:
m.sum()  # full aggregation by default

In [None]:
m.sum(axis=0)

In [None]:
df.sum()  # column-wise by default

In [None]:
df.sum(axis=0)  # column-wise by default

In [None]:
df.sum(axis=1)  # row-wise by parametrization

In [None]:
np.sum(df)  # column-wise by default

In [None]:
df.mean()

In [None]:
np.mean(df)

In [None]:
# df.sqrt()  # does not work ...

In [None]:
np.sqrt(df)  # ... but this does

In [None]:
df.apply(np.sqrt)  # ... and this does

In [None]:
np.exp(df / 10)

In [None]:
(df / 10).apply(np.exp)

In [None]:
np.log(df)

In [None]:
np.log(df).sum()

In [None]:
import math

In [None]:
df['A'].apply(math.exp)  # slow in general

In [None]:
# df.apply(math.exp)  # does not work

In [None]:
df['A'] + df['C']

In [None]:
2 * df['A'] ** 2 - 5 * df['C'].apply(np.sqrt) + df['B']

## Speed & Memory

In [None]:
import sys

In [None]:
N = 10 ** 8

In [None]:
N

In [None]:
%time m = np.arange(N).reshape(int(N / 10), 10)

In [None]:
m.nbytes

In [None]:
%time df = pd.DataFrame(m)

In [None]:
sys.getsizeof(df)

In [None]:
%time res = np.sqrt(m)

In [None]:
%timeit res = np.sqrt(m)

In [None]:
%time res = np.sqrt(df)

In [None]:
%timeit res = np.sqrt(df)

In [None]:
type(df.values)  # from DataFrame to ndarray

In [None]:
%time res = np.sqrt(df.values)

In [None]:
%timeit df.apply(np.sin)

In [None]:
%timeit np.sin(m)

## Preview: Plotting

In [None]:
from numpy.random import default_rng

In [None]:
rng = default_rng(100)

In [None]:
df = pd.DataFrame(rng.standard_normal(10000).reshape(2000, 5))

In [None]:
from pylab import plt
plt.style.use('seaborn-v0_8')
%config InlineBackend.figure_format = 'svg'

In [None]:
df.plot();

In [None]:
df.cumsum().plot();

In [None]:
df.loc[:, 0:2].cumsum().plot();

In [None]:
df.hist(bins=50);

<img src="http://hilpisch.com/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

<a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:training@tpq.io">training@tpq.io</a>