# Pandas play

Load the pandas library.

Import constituents.csv data into our notebook, setting the **Symbol** column as the index.

In [None]:
import pandas as pd
df = pd.read_csv("Data/constituents.csv",index_col="Symbol")

Show the top rows from the dataframe, so we can see what the data looks like

In [None]:
df.head()

Show the top 5 Symbol and names.

In [None]:
df["Name"].head()

In [None]:
print(df["Name"].head())
# Print is redundant in Juptyer notebooks

Show the following 4 columns

In [None]:
df[ ["Name", "HQ", "Founded", "Price"] ]

In [None]:
df.loc["ZBH"]

In [None]:
df.loc[ ["ZBH","MMM"] ]

In [None]:
df.loc["Health Care"] # Nope, only on the index - more later
df[df["Sector"] == "Health Care"]

In [None]:
# Mean, max, min, etc
df[ ["Price","52 Week High", "52 Week Low"] ].mean()

In [None]:
df.dtypes # Show table column data types

# Numpy is a high speed array library - great for matrix operations

In [None]:
import numpy as np

In [None]:
nump1 = np.array([
    [1,2,3,4,5],
    [10,20,30,40,50]
    ])
nump2 = np.array([
    [10,20,30,40,50],
    [100,200,300,400,500]
    ])

In [None]:
numpsum = nump1 * nump2
numpsum

In [None]:
%%timeit
l1 = [1,2,3,4,5]
l2 = [10,20,30,40,50]
l3 = l1 + l2
l3

In [None]:
%%timeit
l1 = np.array([1,2,3,4,5])
l2 = np.array([10,20,30,40,50])
l3 = np.concatenate([l1,l2])
l3

In [None]:
%%timeit
l1 = np.array([1,2,3,4,5])
l2 = np.array([10,20,30,40,50])
l3 = list(l1)+list(l2)
l3

In [None]:
l1 = np.array([1,2,3,4,5])
l2 = np.array([10,20,30,40,50])
l1 > l2

In [None]:
l1 < l2

In [None]:
(l2 - l1) == 27

In [None]:
l1 = np.array([1,2,3,4,5])
np.logical_or(  (l1 % 2) == 0, (l2 % 20) == 0 )

In [None]:
x = np.where( (l1 % 2) == 0, l1+1, l1-1)
x

In [None]:
x = np.where(df['Price'] < 100, 'Cheap', 'Expensive')
x

In [None]:

# Create a new dataframe
smalldf = df[ ["Name","Sector","Price"] ].copy()
smalldf.head()

In [None]:
# Add a new column
# df["avgprice"] = df[df['Sector'] == 'Industrials']["Price"]
# smalldf[smalldf['Sector'] == 'Industrials']["Price"]
smalldf["avgprice"] = smalldf[smalldf['Sector'] == 'Industrials']["Price"]
smalldf.head()
# df[df['Sector'] == 'Industrials']

In [None]:
# newdfsmall = smalldf.reset_index()
newdfsmall[newdfsmall["Symbol"] == "ABT"].index.values[0]
a=newdfsmall.iloc[newdfsmall[newdfsmall["Symbol"] == "ABT"].index]
b=newdfsmall.iloc[newdfsmall[newdfsmall["Symbol"] == "ACN"].index]
newdfsmall.iloc[a.index.values[0] : b.index.values[0]+1]

# Matplot fancy charts

Take from https://matplotlib.org/stable/gallery/lines_bars_and_markers/gradient_bar.html

The main graphs page is https://matplotlib.org/stable/gallery/index.html

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(19680801)


def gradient_image(ax, direction=0.3, cmap_range=(0, 1), **kwargs):
    """
    Draw a gradient image based on a colormap.

    Parameters
    ----------
    ax : Axes
        The Axes to draw on.
    direction : float
        The direction of the gradient. This is a number in
        range 0 (=vertical) to 1 (=horizontal).
    cmap_range : float, float
        The fraction (cmin, cmax) of the colormap that should be
        used for the gradient, where the complete colormap is (0, 1).
    **kwargs
        Other parameters are passed on to `.Axes.imshow()`.
        In particular, *cmap*, *extent*, and *transform* may be useful.
    """
    phi = direction * np.pi / 2
    v = np.array([np.cos(phi), np.sin(phi)])
    X = np.array([[v @ [1, 0], v @ [1, 1]],
                  [v @ [0, 0], v @ [0, 1]]])
    a, b = cmap_range
    X = a + (b - a) / X.max() * X
    im = ax.imshow(X, interpolation='bicubic', clim=(0, 1),
                   aspect='auto', **kwargs)
    return im


def gradient_bar(ax, x, y, width=0.5, bottom=0):
    for left, top in zip(x, y):
        right = left + width
        gradient_image(ax, extent=(left, right, bottom, top),
                       cmap=plt.cm.Blues_r, cmap_range=(0, 0.8))


fig, ax = plt.subplots()
ax.set(xlim=(0, 10), ylim=(0, 1))

# background image
gradient_image(ax, direction=1, extent=(0, 1, 0, 1), transform=ax.transAxes,
               cmap=plt.cm.RdYlGn, cmap_range=(0.2, 0.8), alpha=0.5)

N = 10
x = np.arange(N) + 0.15
y = np.random.rand(N)
gradient_bar(ax, x, y, width=0.7)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

from matplotlib import cbook, cm
from matplotlib.colors import LightSource

# Load and format data
dem = cbook.get_sample_data('jacksboro_fault_dem.npz')
z = dem['elevation']
nrows, ncols = z.shape
x = np.linspace(dem['xmin'], dem['xmax'], ncols)
y = np.linspace(dem['ymin'], dem['ymax'], nrows)
x, y = np.meshgrid(x, y)

region = np.s_[5:50, 5:50]
x, y, z = x[region], y[region], z[region]

# Set up plot
fig, ax = plt.subplots(subplot_kw=dict(projection='3d'))

ls = LightSource(270, 45)
# To use a custom hillshading mode, override the built-in shading and pass
# in the rgb colors of the shaded surface calculated from "shade".
rgb = ls.shade(z, cmap=cm.gist_earth, vert_exag=0.1, blend_mode='soft')
surf = ax.plot_surface(x, y, z, rstride=1, cstride=1, facecolors=rgb,
                       linewidth=0, antialiased=True, shade=False)

plt.show()