# Introduction to Plotting

- Arrays, record arrays and basic statistics: `numpy`
- Simple and Advanced Plotting: `matplotlib`


# Numpy Arrays

### What is a numpy array?

In [21]:
# A python list
python_list = [1, 2, 3, 4, 5]
python_list

[1, 2, 3, 4, 5]

In [22]:
# A python list can have mixed types inside
[1, 2, 3, 4, 5, 'a', 'b']

[1, 2, 3, 4, 5, 'a', 'b']

In [23]:
# Let's import numpy
import numpy as np

In [24]:
# Let's convert our python list into a numpy array
python_np_array = np.array(python_list)
python_np_array

array([1, 2, 3, 4, 5])

In [25]:
# Let's see what is its type
python_np_array.dtype

dtype('int64')

In [26]:
# What about a list of floats?
python_list = [1.0, 2.0, 3.0, 4.0, 5.0]
python_list

[1.0, 2.0, 3.0, 4.0, 5.0]

In [27]:
# Let's convert it to numpy array
python_np_array = np.array(python_list)
python_np_array

array([1., 2., 3., 4., 5.])

In [28]:
# What is its type now
python_np_array.dtype

dtype('float64')

In [29]:
# Let's create an array just with zeros
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [30]:
# Can we reshape it??
np.zeros(25).reshape((5, 5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [31]:
# We could have create it otherwise just with `np.zeros`
np.zeros((5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [32]:
# `np.arange is very useful
np.arange(10, 20, step=0.5)

array([10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15. ,
       15.5, 16. , 16.5, 17. , 17.5, 18. , 18.5, 19. , 19.5])

### Can we read files into numpy arrays?

In [33]:
# Let's get back to our file
!head Data/Glob/f1.txt

head: Data/Glob/f1.txt: No such file or directory


**The file has:**
- header: `x`, `y`, `Err(y)`, `Err(x)`
        - This a 1D SANS I(Q) curve output by Mantid.
- Two rows header


### Reading `csv` files as `numpy` arrays

For the function `np.genfromtxt` see: https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html

In [34]:
import os
import numpy as np

In [35]:
# Use the same file from the previous notebook
file_path = os.path.join("..", "Data", "Glob", "f1.txt")

In [36]:
# genfromtxt default
data = np.genfromtxt(
    file_path,
)
# Data is meanlingless!
data

array([ 1., nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, na

In [37]:
# genfromtxt delimiter and skip_header
data = np.genfromtxt(
    file_path,
    delimiter=",",
    skip_header=2,
)
# Data is 2D numpy array: 200 rows, 4 columns
data

array([[2.32478e-03, 8.22832e+00, 6.77097e-01, 2.01330e-03],
       [4.58718e-03, 5.89150e+00, 1.93922e-01, 1.92328e-03],
       [6.84958e-03, 1.25730e+01, 4.13909e-01, 1.89569e-03],
       [9.11198e-03, 3.73161e+01, 7.68022e-01, 1.99785e-03],
       [1.13744e-02, 1.56672e+02, 2.59008e+00, 2.05609e-03],
       [1.36368e-02, 5.67555e+02, 1.17842e+01, 2.17489e-03],
       [1.58992e-02, 1.40189e+03, 1.61017e+01, 2.29348e-03],
       [1.81616e-02, 1.33420e+03, 1.34719e+01, 2.39931e-03],
       [2.04240e-02, 1.62173e+03, 2.00042e+01, 2.47893e-03],
       [2.26864e-02, 1.40312e+03, 1.01434e+01, 2.56543e-03],
       [2.49488e-02, 1.30591e+03, 1.03458e+01, 2.60186e-03],
       [2.72112e-02, 7.52002e+02, 6.22791e+00, 2.63457e-03],
       [2.94736e-02, 5.86367e+02, 3.34320e+00, 2.69503e-03],
       [3.17360e-02, 4.63746e+02, 2.83678e+00, 2.73969e-03],
       [3.39984e-02, 2.82236e+02, 1.72425e+00, 2.78730e-03],
       [3.62608e-02, 2.52291e+02, 1.15568e+00, 2.85656e-03],
       [3.85232e-02, 1.9

In [38]:
data.shape

(200, 4)

In [39]:
# genfromtxt delimiter, skip_header and names
data = np.genfromtxt(
    file_path,
    delimiter=",",
    skip_header=2,
    names=['X', 'Y', 'E', 'DX'],
)
# Data is now a structure array
data

array([(0.00232478, 8.22832e+00, 6.77097e-01, 0.0020133 ),
       (0.00458718, 5.89150e+00, 1.93922e-01, 0.00192328),
       (0.00684958, 1.25730e+01, 4.13909e-01, 0.00189569),
       (0.00911198, 3.73161e+01, 7.68022e-01, 0.00199785),
       (0.0113744 , 1.56672e+02, 2.59008e+00, 0.00205609),
       (0.0136368 , 5.67555e+02, 1.17842e+01, 0.00217489),
       (0.0158992 , 1.40189e+03, 1.61017e+01, 0.00229348),
       (0.0181616 , 1.33420e+03, 1.34719e+01, 0.00239931),
       (0.020424  , 1.62173e+03, 2.00042e+01, 0.00247893),
       (0.0226864 , 1.40312e+03, 1.01434e+01, 0.00256543),
       (0.0249488 , 1.30591e+03, 1.03458e+01, 0.00260186),
       (0.0272112 , 7.52002e+02, 6.22791e+00, 0.00263457),
       (0.0294736 , 5.86367e+02, 3.34320e+00, 0.00269503),
       (0.031736  , 4.63746e+02, 2.83678e+00, 0.00273969),
       (0.0339984 , 2.82236e+02, 1.72425e+00, 0.0027873 ),
       (0.0362608 , 2.52291e+02, 1.15568e+00, 0.00285656),
       (0.0385232 , 1.94139e+02, 1.03233e+00, 0.00290755

In [40]:
# Attribute indexing
data.dtype

dtype([('X', '<f8'), ('Y', '<f8'), ('E', '<f8'), ('DX', '<f8')])

In [41]:
# X is a column in the file
data['X']

array([0.00232478, 0.00458718, 0.00684958, 0.00911198, 0.0113744 ,
       0.0136368 , 0.0158992 , 0.0181616 , 0.020424  , 0.0226864 ,
       0.0249488 , 0.0272112 , 0.0294736 , 0.031736  , 0.0339984 ,
       0.0362608 , 0.0385232 , 0.0407856 , 0.043048  , 0.0453104 ,
       0.0475728 , 0.0498352 , 0.0520976 , 0.05436   , 0.0566224 ,
       0.0588848 , 0.0611472 , 0.0634096 , 0.065672  , 0.0679344 ,
       0.0701968 , 0.0724592 , 0.0747216 , 0.076984  , 0.0792464 ,
       0.0815088 , 0.0837712 , 0.0860336 , 0.088296  , 0.0905584 ,
       0.0928208 , 0.0950832 , 0.0973456 , 0.099608  , 0.10187   ,
       0.104133  , 0.106395  , 0.108658  , 0.11092   , 0.113182  ,
       0.115445  , 0.117707  , 0.11997   , 0.122232  , 0.124494  ,
       0.126757  , 0.129019  , 0.131282  , 0.133544  , 0.135806  ,
       0.138069  , 0.140331  , 0.142594  , 0.144856  , 0.147118  ,
       0.149381  , 0.151643  , 0.153906  , 0.156168  , 0.15843   ,
       0.160693  , 0.162955  , 0.165218  , 0.16748   , 0.16974

## Array indexing and slicing

In [42]:
# let's generate a dummy array from 0 to 20 with steps of 1
dummy_arr = np.arange(0,21,1)
dummy_arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20])

In [43]:
# First position
dummy_arr[0]

0

In [44]:
# Last position
dummy_arr[-1]

20

In [45]:
# positions 10 to 15 (Note that index 15 is not included!!)
dummy_arr[10:15]

array([10, 11, 12, 13, 14])

In [46]:
# Positions: 0, 2, 4, etc
dummy_arr[::2]

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [47]:
# Positions: 1, 3, 5, etc
dummy_arr[1::2]

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19])

In [48]:
# Positions: 10, 12, 14
dummy_arr[10:15:2]

array([10, 12, 14])

## Array Methods

In [49]:
# Put the X column of the dataset into an array
arr = data['X']
arr.dtype

dtype('float64')

In [50]:
arr.shape

(200,)

In [51]:
dir(arr)

['T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_finalize__',
 '__array_interface__',
 '__array_prepare__',
 '__array_priority__',
 '__array_struct__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__complex__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__ilshift__',
 '__imatmul__',
 '__imod__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_e

### Exercise: get some basic statistics given the methods available in the `numpy` array

In [52]:
print("Max = {:.2}, Min = {:.2}, Average = {:.2}, Std Dev = {:.2}".format(
    arr.max(),
    arr.min(),
    arr.mean(),
    arr.std(),
))

Max = 0.45, Min = 0.0023, Average = 0.23, Std Dev = 0.13


In [53]:
# Normalisation to 1
arr_norm = (arr-arr.min())/(arr.max()-arr.min())

# Reprint the statistics
print("Max = {:.2}, Min = {:.2}, Average = {:.2}, Std Dev = {:.2}".format(
    arr_norm.max(),
    arr_norm.min(),
    arr_norm.mean(),
    arr_norm.std(),
))

Max = 1.0, Min = 0.0, Average = 0.5, Std Dev = 0.29


# Plotting

In [74]:
# Choose your backend (the way the plots are rendered in a notebook)
%matplotlib notebook

# matplotlib.pyplot is a collection of command style functions that make matplotlib work like MATLAB
import matplotlib.pyplot as plt

In [75]:
# Simple case
fig, ax = plt.subplots()
ax.plot(data['X'], data['Y'])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x116f9b400>]

In [77]:
# Let's label some stuff
# Simple Labels. Part 3 introduces axes manipulation
fig, ax = plt.subplots()
ax.set_title("$y = f(x)$")
ax.set_xlabel("Axis X")
ax.set_ylabel("Axis y")
# Marker symbols: https://matplotlib.org/api/markers_api.html
ax.plot(data['X'], data['Y'], marker='x', color='red', linestyle='')


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x116ff71d0>]

In [76]:
# Log x
fig, ax = plt.subplots()
ax.semilogx(data['X'], data['Y'])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x116c80978>]

In [78]:
# Log y
fig, ax = plt.subplots()
ax.semilogy(data['X'], data['Y'])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x116ad8a20>]

In [90]:
# Log X and Log Y
fig, ax = plt.subplots()
ax.loglog(data['X'], data['Y'])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x11e9deac8>]

In [79]:
## Error bars linear
fig, ax = plt.subplots()
ax.errorbar(data['X'], data['Y'], xerr=data['DX'], yerr=data['E'])

<IPython.core.display.Javascript object>

<Container object of 3 artists>

In [91]:
## Error bars Lox Log y
fig, ax = plt.subplots()
ax.errorbar(data['X'], data['Y'], xerr=data['DX'], yerr=data['E'])
ax.set_xscale("log", nonposx='clip')
ax.set_yscale("log", nonposy='clip')

<IPython.core.display.Javascript object>

In [80]:
# Same plot but let's prone some sata
fig, ax = plt.subplots()
ax.errorbar(
    data['X'][2:-10],
    data['Y'][2:-10],
    xerr=data['DX'][2:-10],
    yerr=data['E'][2:-10],
    # fmt = plot format string: '[color][marker][line]'
    # See Notes here: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html
    fmt='rx',
    ecolor='green',
    capthick=2,
)
ax.set_xscale("log", nonposx='clip')
ax.set_yscale("log", nonposy='clip')


<IPython.core.display.Javascript object>

In [63]:
#let's suppose we want data for Q>0.02
data_x_subset_bool = data['X']>0.02
data_x_subset_bool

array([False, False, False, False, False, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [81]:
# Indexing data['X] with the condition created
data['X'][data_x_subset_bool]

array([0.020424 , 0.0226864, 0.0249488, 0.0272112, 0.0294736, 0.031736 ,
       0.0339984, 0.0362608, 0.0385232, 0.0407856, 0.043048 , 0.0453104,
       0.0475728, 0.0498352, 0.0520976, 0.05436  , 0.0566224, 0.0588848,
       0.0611472, 0.0634096, 0.065672 , 0.0679344, 0.0701968, 0.0724592,
       0.0747216, 0.076984 , 0.0792464, 0.0815088, 0.0837712, 0.0860336,
       0.088296 , 0.0905584, 0.0928208, 0.0950832, 0.0973456, 0.099608 ,
       0.10187  , 0.104133 , 0.106395 , 0.108658 , 0.11092  , 0.113182 ,
       0.115445 , 0.117707 , 0.11997  , 0.122232 , 0.124494 , 0.126757 ,
       0.129019 , 0.131282 , 0.133544 , 0.135806 , 0.138069 , 0.140331 ,
       0.142594 , 0.144856 , 0.147118 , 0.149381 , 0.151643 , 0.153906 ,
       0.156168 , 0.15843  , 0.160693 , 0.162955 , 0.165218 , 0.16748  ,
       0.169742 , 0.172005 , 0.174267 , 0.17653  , 0.178792 , 0.181054 ,
       0.183317 , 0.185579 , 0.187842 , 0.190104 , 0.192366 , 0.194629 ,
       0.196891 , 0.199154 , 0.201416 , 0.203678 , 

In [82]:
# Length of the original X vs the prone X
# Note the print statetement order of values!
print("Original length = {1} :: Subset length = {0}.".format(
    len(data['X'][data_x_subset_bool]),
    len(data['X']),
))

Original length = 200 :: Subset length = 192.


In [83]:
# Same plot but let's prone some sata
fig, ax = plt.subplots()
ax.errorbar(
    data['X'][data_x_subset_bool],
    data['Y'][data_x_subset_bool],
    xerr=data['DX'][data_x_subset_bool],
    yerr=data['E'][data_x_subset_bool],
    fmt='b.',
    ecolor='green',
)
ax.set_xscale("log", nonposx='clip')
ax.set_yscale("log", nonposy='clip')

<IPython.core.display.Javascript object>

### Create a new `CSV` file

In [84]:
# Imagine that you want to create a new plot using the same X coordinate and a new Y coordinate
y_new = np.exp(data['X']*10)
y_new

array([ 1.02352014,  1.04694018,  1.07089613,  1.09540023,  1.12046525,
        1.14610358,  1.17232857,  1.19915363,  1.2265925 ,  1.25465922,
        1.28336816,  1.31273402,  1.34277182,  1.37349694,  1.40492511,
        1.43707242,  1.46995531,  1.50359063,  1.53799558,  1.57318779,
        1.60918526,  1.64600642,  1.68367011,  1.72219562,  1.76160267,
        1.80191142,  1.84314251,  1.88531704,  1.92845661,  1.97258329,
        2.01771968,  2.06388886,  2.11111449,  2.15942072,  2.20883229,
        2.25937449,  2.31107319,  2.36395485,  2.41804654,  2.47337595,
        2.5299714 ,  2.58786186,  2.64707697,  2.70764702,  2.76959195,
        2.83298238,  2.8977947 ,  2.96411943,  3.03193188,  3.10129573,
        3.17227818,  3.24485284,  3.31912104,  3.39505513,  3.47272643,
        3.5522102 ,  3.63347685,  3.71663987,  3.80166831,  3.88864201,
        3.97764526,  4.06864492,  4.16176807,  4.25698005,  4.35437027,
        4.4540331 ,  4.55593145,  4.66020762,  4.76682278,  4.87

In [85]:
# Note the size of the X and Y arrays
data['X'].size == y_new.size

True

In [86]:
# Let's plot the new data just to see how it looks like
fig, ax = plt.subplots()
ax.plot(data['X'], y_new)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x11c937e10>]

### Adding the new array to the file

In [92]:
# Adding y_new to the dataset
from numpy.lib import recfunctions
data_new = recfunctions.append_fields(data, 'new', y_new)
data_new.dtype.names

('X', 'Y', 'E', 'DX', 'new')

In [93]:
'''
Remember the original file header:

# X , Y , E , DX
1

'''

# Let's create the new header
# The first row:

header = data_new.dtype.names
# join: joins a list into string
header_str = " , ".join(header)
header_str

'X , Y , E , DX , new'

In [94]:
# The 1st and the 2nd row:
header_str = "# " + header_str + "\n1"
print(header_str)

# X , Y , E , DX , new
1


In [95]:
# Let's save it to the home directory using np.savetxt

# Note that Jupyter hub only allows writing files to our home directory!
out_filepath = '/SNS/users/rhf/tmp/foo.txt'

np.savetxt(
    out_filepath,
    data_new,
    delimiter=",",
    fmt='%.2f',
    header=header_str,
    comments='',
)

FileNotFoundError: [Errno 2] No such file or directory: '/SNS/users/rhf/tmp/foo.txt'

In [None]:
# Use the shell to see what it's inside the file
!head /SNS/users/rhf/tmp/foo.txt

# Advanced Plotting

In [96]:
# Let's create a single plot with two independent Y axis
# X vs Y and X vs new

fig, ax1 = plt.subplots()  # create figure and axes
ax1.plot(data_new['X'], data_new['Y'], color='green')
ax1.set_ylabel('Y', color='green')

ax2 = ax1.twinx()
ax2.plot(data_new['X'], data_new['new'], color='red')
ax2.set_ylabel('new', color='red')

fig.suptitle('Advanced Plot', fontsize=20)

<IPython.core.display.Javascript object>

Text(0.5,0.98,'Advanced Plot')

In [97]:
# Let's Create a plot and create an annotation

fig, ax = plt.subplots()  # create figure and axes
ax.plot(data_new['X'], data_new['Y'], color='red')
ax.set_xticks([0.001, 0.2, 0.4])
ax.set_xticklabels(['low', 'med', 'high'])
# https://matplotlib.org/users/annotations.html
ax.annotate(
    'This is the maximum!', 
    xy=(0.025, 1620), # Coordinate where the head is 
    xytext=(0.1, 1000), # Coordinate where the text starts
    arrowprops={}, # This is an empty dictionary! More details in the Fitting Notebook
)
fig.suptitle('Annotated Plot', fontsize=20, color='red')

<IPython.core.display.Javascript object>

Text(0.5,0.98,'Annotated Plot')