# Python 37

This notebook tests python stuff

## Pandas basics

Should import and be able to plot something. Also save and read important file formats.

In [None]:
import pandas as pd
import numpy as np

ts = pd.Series(np.random.randn(1000),
               index=pd.date_range('1/1/2000', periods=1000))

ts = ts.cumsum()
ts.plot()

In [None]:
df0 = pd.DataFrame(
{'datetime': {0: pd.to_datetime('2020-04-03 09:03:15.620000'),
  1: pd.to_datetime('2020-04-02 09:03:16'),
  2: pd.to_datetime('2020-04-01 09:03:16'),
  3: pd.to_datetime('2020-04-08 09:05:16'),
  4: pd.to_datetime('2020-04-03 08:00:16')},
 'sex': {0: 'F', 1: 'M', 2: 'M', 3: 'F', 4: 'M'},
 'n_seen_int': {0: 3, 1: 1, 2: 0, 3: 15, 4: 7},
 'pref_float': {0: 2.6, 1: 1.1, 2: 9.0, 3: 3.14159, 4: 15.0}})

pandas_file_formats = [
    ('.xlsx', pd.DataFrame.to_excel, dict(), pd.read_excel, dict(index_col=0)),
    ('.feather', pd.DataFrame.to_feather, dict(), pd.read_feather, dict()),
    ('.h5', pd.DataFrame.to_hdf, dict(key='data', mode='w'), pd.read_hdf, dict())
]
for ext, writer, writer_args, reader, reader_args in pandas_file_formats:
    writer(df0, '/tmp/df0'+ext, **writer_args)
    df1 = reader('/tmp/df0'+ext, **reader_args)
    assert (df0 == df1).all().all(), f'Writing then reading dataframe{ext} did not preserve data integrity'
    assert (df0.dtypes == df1.dtypes).all(), f'Writing then reading dataframe{ext} did not preserve col dtypes'

# Sadly, xls format seems to lose datetime resolution sometimes. Here check for just datatime within 1s.
df0.to_excel('/tmp/df0.xls')
df1 = pd.read_excel('/tmp/df0.xls', index_col=0)
check_datetimes = ((df1['datetime'] - df0['datetime']).apply(lambda dt: dt.total_seconds()) <= 1.0).all()
assert check_datetimes, f'Writing then reading {fmt} did not preserve datetimes'
assert (df0.iloc[:, 1:] == df1.iloc[:, 1:]).all().all(), f'Writing then reading {fmt} did not preserve data integrity'
assert (df0.iloc[:, 1:].dtypes == df1.iloc[:, 1:].dtypes).all(), f'Writing then reading {fmt} did not preserve col dtypes'

## Numpy basics

In [None]:
import numpy as np

np.random.seed(42)
A = np.random.randint(5, size=(4,3))
b = np.random.randint(5, size=(4, 1))

assert (A * b == np.array([[12, 16,  8], [4,  4,  1], [6,  6,  6], [4,  3,  2]])).all()

## Matplotlib

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

fig, ax = plt.subplots(1, 1)
ax.plot([0, 2, 4, 6, 8], [0, 4, 16, 36, 64])

## Seaborn basics

In [None]:
import seaborn as sns

df0 = pd.DataFrame(
{'n_seen_int': {0: 3, 1: 1, 2: 0, 3: 15, 4: 7},
 'pref_float': {0: 2.6, 1: 1.1, 2: 9.0, 3: 3.14159, 4: 15.0}})

sns.relplot(x="n_seen_int", y="pref_float", data=df0);

## Bokeh basics

## Scikit-learn basics

Uses https://scikit-learn.org/dev/auto_examples/ensemble/plot_gradient_boosting_regression.html#sphx-glr-auto-examples-ensemble-plot-gradient-boosting-regression-py to check basics work.

## cython basics

Can we compile a short bit of cython?

## Sympy