# Homework: Python for machine learning and Data Analysis
#### Here are listed the most common Python & NumPy methods used in the course, as well as usage examples. This sheet doesn't contain extensive information. Please refer to the documentation for details.


Prepared by Mykhailo Vladymyrov,
Science IT Support, University of Bern, 2023

This work is licensed under <a href="https://creativecommons.org/share-your-work/public-domain/cc0/">CC0</a>.



When solving exercises, try not to use copypastig: by typing you reinforce your brain to learn better.

# 0. List, tuple, set, dict

In [None]:
# A `tuple` can contain any number of any element and can't be modified
x_coordinates = (0, 1, 2, 3)

#               ^ ---------^ tuple is written in normal brackets

In [None]:
# To see what any object is, it's a good idea to print it:
print(x_coordinates)

In [None]:
# or just:
x_coordinates

In [None]:
# As well as check it's type:
type(x_coordinates)

In [None]:
# and available methods and properties
dir(x_coordinates)

In [None]:
# the `__doc__` property often contains useful info
print(x_coordinates.__doc__)

 ---

In [None]:
# Function len called on any collection — array like object — will return it's length

x_coordinates_length = len(x_coordinates)
print('length of the `x_coordinates` is', x_coordinates_length)

In [None]:
print('Also it is handy to use so-called f-strings (formatted string): they allow to easily format the output:')
print(f'For example:\n\tlength of the `x_coordinates={x_coordinates}` is {x_coordinates_length}')

#     ^------- f before string marks an f-string


In [None]:
# `list` is similar to `tuple`, but can be modified:

y_coordinates = [1, 1, 4]

#               ^ ---------^ list is written in square brackets

In [None]:
# One can loop through elements of a collection:

for x in x_coordinates:
  print (x)

In [None]:
# Or also obtain the index of the element:

for idx, y in enumerate(y_coordinates):
  print (f'y[{idx}] = {y}')

In [None]:
# Several collections can be iterated together by zipping them:

for x, y in zip(x_coordinates, y_coordinates):
  print (x, y)

In [None]:
# Elements of the list can be modified:
print(y_coordinates[0])
y_coordinates[0] = 0
print(y_coordinates[1])
print(y_coordinates)

In [None]:
# `list` can be created from another collection:

x_coordinates = list(x_coordinates)
print(f'now `x_coordinates` is {type(x_coordinates)}')

In [None]:
# Elements can be appended to a list


y_coordinates.append(9)
print(y_coordinates)

y_coordinates.append(16)
print(y_coordinates)

y_coordinates.append(25)
print(y_coordinates)


In [None]:
# Extended with another list:
x_coordinates.extend([4, 5])

In [None]:
# or added

all_numbers = x_coordinates + y_coordinates
print(all_numbers)

In [None]:
# `set` - is a collection of unique elements:
unique_numbers = set(all_numbers)
print(unique_numbers)

In [None]:
# `dictionary` is a collection where the values are assigned to unique keys and can be accessed by the key:

uptime_hours = {'jupyter': 10, 'chrome': 30}

In [None]:
print(uptime_hours['jupyter'])

In [None]:
# `list` comprehensions are a quick way to define a list:

y_coordinates = [x**2 for x in x_coordinates]
print(y_coordinates)

In [None]:
# `set`:
values = {v%7 for v in y_coordinates}
print(values)

In [None]:
# `dictionary`

x_at_y = {y:x for x, y in zip(x_coordinates, y_coordinates)}
print(x_at_y)
print(x_at_y[25])

# 1. Pyplot

In [None]:
import matplotlib.pyplot as plt

In [None]:
x_coordinates = list(range(6))
y_coordinates = [x**2 for x in x_coordinates]

In [None]:
# Simple plot

plt.plot(x_coordinates, y_coordinates)

In [None]:
# Scatter plot
plt.scatter(x_coordinates, y_coordinates, marker='x', color='b', s=10)

In [None]:
# Scatter plot with axes range
plt.scatter(x_coordinates, y_coordinates, marker='x', color='b', s=10)
plt.xlim(0, 10)
plt.ylim(0, 10)

In [None]:
# Plot with isotropic axes

plt.scatter(x_coordinates, y_coordinates, marker='x', color='b', s=10)

current_axis = plt.gca()
current_axis.set_aspect('equal')

In [None]:
# Big plot, with details

plt.figure(figsize=(8,8))  # plot size

plt.plot(x_coordinates, y_coordinates)

plt.xlabel('x coordinate')
plt.ylabel('y coordinate')
plt.title('x=y^2')

plt.show()
plt.close()  # It is always a good idea to close the plot to save memory

In [None]:
# Multiple plots can be combined with subplots:

fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(8, 8))

ax[0][0].scatter(x_coordinates, y_coordinates, marker='x', color='b', s=30)
ax[0][1].scatter(x_coordinates, y_coordinates, marker='o', c=y_coordinates, s=30)

ax[1][0].plot(x_coordinates, y_coordinates)
ax[1][1].scatter(x_coordinates, y_coordinates, marker='^', c=y_coordinates, s=30, cmap=plt.cm.Accent)

plt.show()
plt.close()

In [None]:
# To make a plot in 3D

fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111, projection='3d')
z_coordinates = y_coordinates

ax.scatter3D(x_coordinates, y_coordinates, z_coordinates, marker='x', s=20)

plt.show()
plt.close()

# 1. NumPy

In [None]:
import numpy as np

In [None]:
# `numpy` arrays are similar to `list`s mutable containers with much richer functionality

In [None]:
# Array-like objects can be converted to `numpy` array:
arr = np.array(x_coordinates)
print(arr)

In [None]:
# Attribute `shape` shows the size of an array.
print(arr.shape)

In [None]:
# Shape is a `tuple` because an array can have more than one dimension:

arr2d = np.asarray([[1,2,3], [4, 5, 6]])
print(arr2d)
print(arr2d.shape)  # last element of the shape - most inner dimension of the array

In [None]:
# Multidimensional array can be reshaped to different a shape, preserving the total number of elements and their order

arr2d_r1 = arr2d.reshape((3, 2))
print(arr2d_r1)

In [None]:
# or turned into 1-d with flatten method:

arr2d_1d = arr2d.flatten()
print(arr2d_1d, arr2d_1d.shape)

In [None]:
# Slicing of an array:
# start:stop - elements from start (included) till stop (excluded)

print(arr[1:3])

In [None]:
# Slicing of an array:
# start:stop:step - elements from start (included) till stop (excluded) with stride step

print(arr[1:6:2])

In [None]:
# Similarly for a multidimensional array:
#
print(arr2d[0:1, 1:3])

In [None]:
# `:` means take elements along the axis
#
print(arr2d[:, 1:3])

In [None]:
# To take elements over several sequential axes - use ellipsis (...):
#
print(arr2d[..., 1:3])

In [None]:
# To generate sequential integers (similar to `range`):
numbers = np.arange(0, 20, 2)
print(numbers)

In [None]:
# For floating point values

x_coord = np.linspace(start=-1, stop=1, num=5)
print (x_coord)

In [None]:
# Operations on the numpy arrays can be performed in a pythonic way:

y_coord = 2*x_coord**2 + 3

plt.plot(x_coord, y_coord)
plt.gca().set_aspect('equal')

---

In [None]:
# To generate uniformly distributed random numbers:
rnd = np.random.uniform(0, 10, size=10)
print(rnd)
print(rnd.shape)

In [None]:
# or normally distributed, 3D array:
rnd = np.random.normal(loc=0.5, scale=2, size=(3, 4, 5))
print(rnd)
print(rnd.shape)

In [None]:
# Let's check distribution:
plt.hist(rnd.flatten(), 10);

In [None]:
# Arrays statistics can be obtained like:
print('array `rnd` mean = ', rnd.mean())
print('array `rnd` standard deviation = ', rnd.std())
print('array `rnd` minimum = ', rnd.min())
print('array `rnd` maximum = ', rnd.max())
print('array `rnd` maximum 40th percentile= ', np.percentile(rnd, 40))


In [None]:
# To get an element from a 1D array:
print(np.random.choice(arr))

In [None]:
# or several elements:
mtx = np.random.choice(arr, size=(2,2))
print(mtx)

In [None]:
# Matrix multiplication
np.dot([1, 2], mtx)  # can be also written as [1, 2]@ mtx

---

In [None]:
# Sometimes it's needed to obtain a grid of values, given a set of values along each axis,
# e.g. for a grid search or visualization. `meshgrid` does it so


# define set of values along x, y axes
x_coords = np.linspace(0, 4.5, 10)      # 10 values between 0 and 45
y_coords = np.linspace(-10, -5.5, 10)   # 10 values between -10 and -6.5

# create the meshgrid
meshgrid_x, meshgrid_y = np.meshgrid(x_coords, y_coords)
print(meshgrid_x.shape, meshgrid_y.shape)  # all x, y coordinates of the mesh

In [None]:
print(meshgrid_x)

In [None]:
print(meshgrid_y)

In [None]:
# To plot the points created by the meshgrid we need to flatten them:
x_coordinates = meshgrid_x.flatten()
y_coordinates = meshgrid_y.flatten()
index = np.arange(len(x_coordinates))

sc = plt.scatter(x_coordinates, y_coordinates, c=index, cmap=plt.cm.gist_earth)
plt.colorbar(sc, ax=plt.gca())

---

In [None]:
# Arrays can be concatenated along a specific axis, provided the other dimensions are same

arr_1 = np.zeros(shape=(5, 3, 6))
arr_2 = np.ones(shape=(5, 4, 6))

print(arr_1.shape, arr_2.shape)
#print(arr_1)
#print(arr_2)

In [None]:
arr_conc = np.concatenate((arr_1, arr_2), axis=1)

print(arr_conc.shape)

In [None]:
# stack along new last axis
arr_stack = np.stack((arr_1, arr_2[:, :-1]), axis=-1)

print(arr_stack.shape)

In [None]:
# Similarly to indexing one element, multiple elements of an array can be obtained
y = np.arange(6)**2
print(y)

print(f'y[2] = {y[2]}')
print(f'y[4] = {y[4]}')
print(f'y[[2,4]] = {y[[2,4]]}')

In [None]:
# This can be useful to shuffle several arrays of elements coherently:

shuffled_indexes = np.random.permutation(len(y))
y_shuffled = y[shuffled_indexes]
print(f'{y}[{shuffled_indexes}] = {y_shuffled}')

In [None]:
# Boolean arrays of the same dimensions can be used as masks
mask = [True, False, False, False, False, True]
print(f'{y}[{mask}] = {y[mask]}')

In [None]:
# This is useful to select a group of elements:
mask_above_2 = y > 2
print(mask_above_2)

mask_less_17 = y <17
print(mask_less_17)

mask = mask_above_2 * mask_less_17  # elementwise `and` operation on Boolean numpy arrays
print(mask)

print('values between 2 and 17:', y[mask])

---

In [None]:
# To get index of first smallest or largest element use `argmin` and `argmax`:

print(y_shuffled)

ixd_smallest = y_shuffled.argmin()
ixd_largest = y_shuffled.argmax()

print(f'index of smallest element: {ixd_smallest}. {y_shuffled}[{ixd_smallest}] = {y_shuffled[ixd_smallest]}')
print(f'index of largest element: {ixd_largest}. {y_shuffled}[{ixd_largest}] = {y_shuffled[ixd_largest]}')

In [None]:
# Form a multidimensional array, or if more the one element has to be found - use argwhere:
coords_elements_above_3 = np.argwhere(rnd > 3)
print(coords_elements_above_3)

for i, j, k in coords_elements_above_3:
  print(f'rnd[{i}, {j}, {k}] = {rnd[i, j, k]}')

In [None]:
# Transposing index groups allows to obtain index arrays for each axis
arr_i, arr_j, arr_k = coords_elements_above_3.T
print(arr_i)
print(arr_j)
print(arr_k)

# 2. Image

In [None]:
# Loading an image from a file or a URL
from skimage import io

image = io.imread('https://github.com/neworldemancer/DSF5/raw/master/figures/unibe.jpg')
print(image.shape)

In [None]:
# Display an image with pyplot
plt.imshow(image);

In [None]:
# Any 2D map can be visualized similarly, e.g. a 2D histogram:

values = np.random.multivariate_normal([0, 0], [[1, 0.3],[ 0.3, 0.2]], size=1000)
plt.scatter(*values.T, s=5)

h, bx, by = np.histogram2d(*values.T, bins = 20)
plt.show()

plt.imshow(h, origin='lower')
plt.show()

# 3. Object methods

Methods of objects are called by writing object_name.method_name similarly to how we do for modules:

```xx.yy(smth=value, smth2=v0)```

E.g. `plt.plot` and `np.max` - method of modules `plt` and `np`; `arr_i.max` - method of object `arr_i`.

When you start typing anything in the brackets or press `Shift + Tab` in Colab or Jupyter - a `__doc__` string of the function will be shown. You can see all the parameters, their description, and often examples. Don't even need to google it;)

# 4. Interactive & Animation

In [None]:
from ipywidgets import interact

In [None]:
# You can often get much better intuition if you explore a range of parameters.
# One way to do it - is with interactive widgets

# Here we plot a line with a different slope and intercept depending on the parameters set

@interact  # creates widgets according to function parameters (see https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Basics.html)
def plot(a=(-5, 5, 0.1), b=(-5, 5, 0.1)): # each parameter will be interactive
    # parameters for each variable are in range() format: start:stop:step

    f = lambda x: a*x+b
    x0 = -10
    x1 = 10

    y0 = f(x0)
    y1 = f(x1)

    plt.plot([x0, x1], [y0, y1])

    plt.gca().set_aspect('equal')
    plt.xlim(-10, 10)
    plt.ylim(-10, 10)
    plt.title(f'y={a:.1f}x+{b:.1f}')
    plt.grid(True)
    plt.show()


The function above is called on each parameter update. This means, that if the function takes too long to compute such an approach might be not usable.

Animation serves a similar purpose, but all frames are pre-rendered. It is fast for visualization, but can take long to render all frames, and will have a large memory footprint with many frames.

In [None]:
from matplotlib import animation
plt.rcParams["animation.html"] = "jshtml"  # for matplotlib 2.1 and above, uses JavaScript

In [None]:
%%capture
# cell magic `capture` suppresses output of this cell

f = lambda x, a: a*x+1
x0 = -10
x1 = 10

a = 0
y0 = f(x0, a)
y1 = f(x1, a)

fig = plt.figure()
lines = plt.plot( [x0, x1], [y0, y1])

plt.gca().set_aspect('equal')
plt.xlim(-10, 10)
plt.ylim(-10, 10)
plt.grid(True)


def animate(i):
    a = 0.1 * i
    y0 = f(x0, a)
    y1 = f(x1, a)

    l = lines[0]
    l.set_data([x0, x1], [y0, y1])
    return lines

ani = animation.FuncAnimation(fig, animate, frames=10)


In [None]:
# Show animation
ani

# 5.Exercises:

1. create a list of names and list of ages
2. make a dictionary using dictionary comprehension from these lists


3. convert ages to np.array
4. find biggest values, and it's index


5. download an image from the URL
6. crop any area 100x100 pixels, all channels
7. flatten and plot histogram
8. find the value of the darkest and brightest pixel
9. find the coordinates and channel of the brightest pixel value


10. find all unique pixel values with a set
11. create arrays of x and y coordinates of all pixels with meshgrid
12. make a 3D surface plot of red channel values (z) with plot_surface.


13. find all elements in the green channel smaller than the 30th percentile
14. set corresponding elements in the red channel to 255, show the image


15. make an interactive plot that crops a 100x100 area at coordinates given by a slider


16. make an animation of 13-14 animating the resulting image with percentiles 9,10, ...90, 95

