# Pandas template

* Cells before the `# [[nbplot]] template` are ignored.
* Cells starting with `# [[nbplot]] ignore` are also ignored.

* Some variables are substituted in every cell:
    * `${root_path}`: the working directory when `nbplot` was called. Input files will be relative to this.

* Some variables are subtituted in the `[[nbplot]] for i,input in enumerate(inputs)` blocks:
    * `${i}`: index of the input in the list
    * `${input.pretty_name}`: truncated path of the file, or 'stdin'
    * `${input.rel_path}`: path of the file relative to the `root_path`, or `stdin`
    * `${input.abs_path_or_io}`: full filepath or StringIO when the data comes from stdin
    * `${input.guessed_sep}`: separator guessed by nbplot for this file. Usually space or comma.

In [None]:
# [[nbplot]] template
# Note: don't change that first line, it tells nbplot that the notebook below is a template
# This cell will be executed and the metadata dictionary loaded, but not included in the output.

template_metadata = {
    'name': 'pandas',
    'format_version': '0.1'
}

In [None]:
import io, math, os, sys
from base64 import b64decode
from pathlib import Path

import matplotlib.pyplot as plt
import pandas
import numpy as np
import mplcursors

# Transform x,y into a smooth x,y with splines (similar to gnuplot csplines)
# Sample usage: ax.plot(*csplines(x,y)) # Don't forget the * to expand the output (x,y) tuple!
def csplines(x,y):
    from scipy.interpolate import make_interp_spline
    spl = make_interp_spline(x, y, 3)
    x_smooth = np.linspace(x[0], x[len(x)-1], max(300, len(x)*10)) # at least 10x the number of points
    return x_smooth, spl(x_smooth)

# \s+ will handle \t, etc.
def pandas_delim(delim): return '\s+' if delim == ' ' else delim

In [None]:
# [[nbplot]] ignore
# Not enabling that for now.
# Cache mechanism to avoid reloading the files everytime we run the plot cell
loaded_dataframes = {}
def needs_reload(path): return path not in loaded_dataframes or os.path.getmtime(path) != loaded_dataframes[path][1]
def cache_dataframe(path, df): loaded_dataframes[path] = (df, os.path.getmtime(path)); return df
# Meant to be used as:
# df${i} = None
# if needs_reload("${input.abs_path_or_io}"):
#   df${i} = cache_dataframe("${input.abs_path_or_io}", df${i})

## <center> Cheatsheet gnuplot <> matplotlib </center>

|Gnuplot | Matplotlib|
| :-- | :-- |
| `with lines` | `default` or `ax.plot(..., '-')` |
| `with linespoints` | `ax.plot(..., '.-')` |
| `with points` | `ax.plot(..., '.')` |
| `smooth csplines` | `ax.plot(*csplines(x,y))` |
| `using 1:2` | `ax.plot(df[0], df[1])` |
| `using 0:1` | `ax.plot(df[0])` |

In [None]:
# interactive mode by default
%matplotlib notebook
#%matplotlib inline
plt.ioff() # show the figure only at the end to avoid postponing potential loading errors

fig,ax = plt.subplots(figsize=(8,6), num='MyWindow')
#fig.suptitle('MyPlot')
#ax.set_title('My Title')
#ax.set_xlabel('x')
#ax.set_ylabel('y')

root_path = Path("$root_path")

# [[nbplot]] for i,input in enumerate(inputs)
name${i} = "${input.pretty_name}"; file_or_io${i} = ${input.abs_path_or_io}
df${i} = pandas.read_csv(file_or_io${i}, sep=pandas_delim('${input.guessed_sep}'), header=None, comment='#', skipinitialspace=True, skiprows=0)
display(df${i}.head())
x, y = (df${i}[0], df${i}[1]) if len(df${i}.columns) > 1 else (np.arange(0,len(df${i})), df${i}[0])
ax.plot(x, y, label=name${i})
# [[nbplot]] endfor

ax.legend()
mplcursors.cursor() # enable the cursors, left click to annotate a point, right click to hide it.
plt.show() # show the plot
plt.ion(); # restore interactive mode