# Numpy Demo

This is just a cheat sheet from Patrick LÃ¶ber. If you want further information on this, you may refer back to this tutorial:

https://www.youtube.com/watch?v=9JUAPgtkKpI

## Installation and Array Basics

In [6]:
pip install numpy

Note: you may need to restart the kernel to use updated packages.


In [7]:
#or: conda install numpy

Import numpy:

In [8]:
import numpy as np

Check version of Numpy

In [11]:
np.__version__

'1.21.5'

Central object is the array

In [19]:
a = np.array([1,2,3,4,5])

a # [1 2 3 4 5]

array([1, 2, 3, 4, 5])

In [20]:
#shape of the array
a.shape

(5,)

In [21]:
#type of elements
a.dtype

dtype('int64')

In [22]:
# number of dimensions
a.ndim

1

In [23]:
# total number of elements
a.size

5

In [24]:
# the size in bytes of each elements
a.itemsize

8

Essential Methods

In [25]:
a = np.array([1,2,3])

In [26]:
# access and change elements
print(a[0])
a[0] = 5
print(a) 

1
[5 2 3]


In [27]:
# elementwise math operations
b = a * np.array([2,0,2])
print(b) 
print(a.sum())

[10  0  6]
10


## Array vs. List

In [28]:
l = [1,2,3]

In [29]:
# create an array from a list
a = np.array([1,2,3]) 
print(l) # [1, 2, 3]
print(a) # [1 2 3]

[1, 2, 3]
[1 2 3]


In [30]:
# adding new item
l.append(4)

In [38]:
# showing that this method is not possible
a.append(4)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [33]:
# there are ways to add items, but this essentially creates new arrays
l2 = l + [5]
print(l2) # [1, 2, 3, 4, 5]

[1, 2, 3, 4, 5]


In [34]:
# this is called broadcasting, adds 4 to each element
a2 = a + np.array([4])
print(a2) 

[5 6 7]


In [35]:
# vector addidion (this is technically correct compared to broadcasting)
a3 = a + np.array([4,4,4])
print(a3)

[5 6 7]


In [37]:
# showing that this method is not possible
a3 = a + np.array([4,5])

ValueError: operands could not be broadcast together with shapes (3,) (2,) 

In [39]:
# multiplication
l2 = 2 * l # list l repeated 2 times, same a l+l
print(l2)

[1, 2, 3, 4, 1, 2, 3, 4]


In [41]:
# multiplication for each element
a3 = 2 * a

print(a3)

[2 4 6]


In [42]:
# modify each item in the list
l2 = []
for i in l:
 l2.append(i**2)
print(l2) # [1, 4, 9, 16]

[1, 4, 9, 16]


In [43]:
# or list comprehension
l2 = [i**2 for i in l]
print(l2) # [1, 4, 9, 16]

[1, 4, 9, 16]


In [45]:
a2 = a**2 # -> squares each element!

print(a2)

[1 4 9]


In [46]:
# Note: function applied to array usually operates element wise
a2 = np.sqrt(a) # np.exp(a), np.tanh(a)
print(a2)  

[1.         1.41421356 1.73205081]


In [47]:
a2 = np.log(a)
print(a2)

[0.         0.69314718 1.09861229]


## Dot Product

In [48]:
a = np.array([1,2])
b = np.array([3,4])

In [49]:
# sum of the products of the corresponding entries
# multiply each corresponding elements and then take the sum

In [50]:
# cumbersome way for lists
dot = 0
for i in range(len(a)):
 dot += a[i] * b[i]
print(dot)

11


In [51]:
# easy with numpy 
dot = np.dot(a,b)
print(dot) 

11


In [52]:
# step by step manually
c = a * b
print(c) 
d = np.sum(c)
print(d) # 11

[3 8]
11


In [53]:
# most of these functions are also instance methods
dot = a.dot(b)
print(dot) 
dot = (a*b).sum()
print(dot) 

11
11


In [54]:
# in newer versions
dot = a @ b
print(dot)

11


## Speed Test Array vs. List

In [62]:
from timeit import default_timer as timer

a = np.random.randn(1000)
b = np.random.randn(1000)

A = list(a)
B = list(b)

T = 1000

def dot1():
    dot = 0
    for i in range(len(A)):
        dot += A[i]*B[i]
    return dot

def dot2():
    return np.dot(a,b)

start = timer()
for t in range(T):
    dot1()
end = timer()
t1 = end-start

start = timer()
for t in range(T):
    dot2()
end = timer()
t2 = end-start

print('Time with lists:', t1) 
print('Time with array:', t2)
print('Ratio', t1/t2) 

Time with lists: 0.15900009100005263
Time with array: 0.0015432519999194483
Ratio 103.02924668709441


## Multidimensional (nd) Arrays

In [63]:
# (matrix class exists but not recommended to use)
a = np.array([[1,2], [3,4]])
print(a)

[[1 2]
 [3 4]]


In [64]:
print(a.shape)

(2, 2)


In [66]:
# Access elements
# row first, then columns
print(a[0]) 
print(a[0][0])

[1 2]
1


In [67]:
# slicing
print(a[:,0]) # all rows in col 0: [1 3]
print(a[0,:]) # all columns in row 0: [1 2]

[1 3]
[1 2]


In [68]:
# transpose
a.T

array([[1, 3],
       [2, 4]])

In [69]:
# matrix multiplication
b = np.array([[3, 4], [5,6]])
c = a.dot(b)

In [70]:
# elementwise multiplication
d = a * b 

In [71]:
# inner dimensions must match!
b = np.array([[1,2,3], [4,5,6]])
c = a.dot(b.T)

ValueError: shapes (2,2) and (3,2) not aligned: 2 (dim 1) != 3 (dim 0)

In [72]:
# determinant
c = np.linalg.det(a)

In [73]:
# inverse
c = np.linalg.inv(a)

In [74]:
# diag
c = np.diag(a)
print(c) # [1 4]

[1 4]


In [75]:
# diag on a vector returns diagonal matrix (overloaded function)
c = np.diag([1,4])
print(c)

[[1 0]
 [0 4]]


## Indexing, Slicing, and Boolean Indexing

In [77]:
# Slicing: Similar to Python lists, numpy arrays can be sliced.
# Since arrays may be multidimensional, you must specify a slice for each
# dimension of the array:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(a)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [78]:
# Integer array indexing
b = a[0,1]
print(b) 

2


In [79]:
# Slicing
row0 = a[0,:]
print(row0)

[1 2 3 4]


In [80]:
col0 = a[:, 0]
print(col0) 

[1 5 9]


In [81]:
slice_a = a[0:2,1:3]
print(slice_a)

[[2 3]
 [6 7]]


In [82]:
# indexing starting from the end: -1, -2 etc...
last = a[-1,-1]
print(last)

12


In [84]:
# Boolean Indexing
a = np.array([[1,2], [3, 4], [5, 6]])
print(a)

[[1 2]
 [3 4]
 [5 6]]


In [85]:
# same shape with True or False for the condition
bool_idx = a > 2
print(bool_idx)

[[False False]
 [ True  True]
 [ True  True]]


In [86]:
# note: this will be a rank 1 array!
print(a[bool_idx]) # [3 4 5 6]

[3 4 5 6]


In [87]:
# We can do all of the above in a single concise statement:
print(a[a > 2])

[3 4 5 6]


In [88]:
# np.where(): same size with modified values
b = np.where(a>2, a, -1)
print(b)

[[-1 -1]
 [ 3  4]
 [ 5  6]]


In [89]:
# fancy indexing: access multiple indices at once
a = np.array([10,19,30,41,50,61])

In [90]:
b = a[[1,3,5]]
print(b)

[19 41 61]


In [91]:
even = np.argwhere(a%2==0).flatten()
print(even)

[0 2 4]


In [92]:
a_even = a[even]
print(a_even)

[10 30 50]


## Reshaping

In [93]:
a = np.arange(1, 7)
print(a)

[1 2 3 4 5 6]


In [94]:
b = a.reshape((2, 3)) # error if shape cannot be used
print(b)

[[1 2 3]
 [4 5 6]]


In [95]:
c = a.reshape((3, 2)) # 3 rows, 2 columns
print(c)

[[1 2]
 [3 4]
 [5 6]]


In [96]:
# newaxis is used to create a new axis in the data
# needed when model require the data to be shaped in a certain manner
print(a.shape) 

(6,)


In [97]:
d = a[np.newaxis, :]
print(d) 
print(d.shape)

[[1 2 3 4 5 6]]
(1, 6)


In [98]:
e = a[:, np.newaxis]
print(e)

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]


In [99]:
print(e.shape)

(6, 1)


## Concatenation

In [100]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])

In [101]:
# combine into 1d
c = np.concatenate((a, b), axis=None)
print(c) 

[1 2 3 4 5 6]


In [102]:
# add new row
d = np.concatenate((a, b), axis=0)
print(d)

[[1 2]
 [3 4]
 [5 6]]


In [103]:
# add new column: note that we have to transpose b!
e = np.concatenate((a, b.T), axis=1)
print(e)

[[1 2 5]
 [3 4 6]]


In [105]:
# hstack: Stack arrays in sequence horizontally (column wise). needs a tuple
a = np.array([1,2,3,4])
b = np.array([5,6,7,8])
c = np.hstack((a,b))
print(c) 

[1 2 3 4 5 6 7 8]


In [106]:
a = np.array([[1,2], [3,4]])
b = np.array([[5,6], [7,8]])
c = np.hstack((a,b))
print(c)

[[1 2 5 6]
 [3 4 7 8]]


In [107]:
# vstack: Stack arrays in sequence vertically (row wise). needs a tuple
a = np.array([1,2,3,4])
b = np.array([5,6,7,8])
c = np.vstack((a,b))
print(c)

[[1 2 3 4]
 [5 6 7 8]]


In [108]:
a = np.array([[1,2], [3,4]])
b = np.array([[5,6], [7,8]])
c = np.vstack((a,b))
print(c)

[[1 2]
 [3 4]
 [5 6]
 [7 8]]


## Broadcasting

Broadcasting is a powerful mechanism that allows numpy to work with arrays of different shapes
when performing arithmetic operations. Frequently we have a smaller array and a larger array,
and we want to use the smaller array multiple times to perform some operation on the larger
array.

In [109]:
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
y = np.array([1, 0, 1])
z = x + y # Add v to each row of x using broadcasting
print(z)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


## Functions and Axis

In [110]:
a = np.array([[7,8,9,10,11,12,13], [17,18,19,20,21,22,23]])
print(a.sum())
print(a.sum(axis=None))

210
210


In [111]:
print(a.sum(axis=0)) # along the rows -> 1 sum entry for each column

[24 26 28 30 32 34 36]


In [112]:
print(a.sum(axis=1)) # along the columns -> 1 sum entry for each row

[ 70 140]


In [113]:
print(a.mean())
print(a.mean(axis = None))

15.0
15.0


In [114]:
# 1 mean entry for each column
print(a.mean(axis = 0))

[12. 13. 14. 15. 16. 17. 18.]


In [115]:
# 1 mean entry for each row
print(a.mean(axis = 1))

[10. 20.]


In [116]:
# some more: std, var, min, max

## Datatypes

[More information here](https://numpy.org/devdocs/user/basics.types.html)

In [118]:
# Let numpy choose the datatype
x = np.array([1, 2])
print(x.dtype) 

int64


In [119]:
# Let numpy choose the datatype
x = np.array([1.0, 2.0])
print(x.dtype) #

float64


In [120]:
# Force a particular datatype, how many bits (how precise)
x = np.array([1, 2], dtype=np.int64) # 8 bytes
print(x.dtype)

int64


In [121]:
x = np.array([1, 2], dtype=np.float32) # 4 bytes
print(x.dtype) 

float32


## Copying

In [122]:
a = np.array([1,2,3])
b = a # only copies reference!
b[0] = 42
print(a)

[42  2  3]


In [123]:
a = np.array([1,2,3])
b = a.copy() # actual copy!
b[0] = 42
print(a)

[1 2 3]


## Generating Arrays

In [124]:
# zeros
a = np.zeros((2,3)) # size as tuple

# [[0. 0. 0.]
# [0. 0. 0.]]

In [125]:
# ones
b = np.ones((2,3))
# [[1. 1. 1.]
# [1. 1. 1.]]

In [126]:
# specific value
c = np.full((3,3),5.0)
# [[5. 5. 5.]
# [5. 5. 5.]
# [5. 5. 5.]]

In [127]:
# identity
d = np.eye(3) #3x3
# [[1. 0. 0.]
# [0. 1. 0.]
# [0. 0. 1.]]

In [128]:
# arange
e = np.arange(10)
# [0 1 2 3 4 5 6 7 8 9]

In [129]:
# linspace
f = np.linspace(0, 10, 5)
# [ 0. 2.5 5. 7.5 10. ]

## Random Numbers

In [131]:
a = np.random.random((3,2)) # uniform 0-1 distribution

# [[0.06121857 0.10180167]
# [0.83321726 0.54906613]
# [0.94170273 0.19447411]]

In [133]:
b = np.random.randn(3,2) # normal/Gaussian distribution, mean 0 and unit variance
# no tuple as shape here! each dimension one argument
# [[ 0.56759123 -0.65068333]
# [ 0.83445762 -0.36436185]
# [ 1.27150812 -0.32906051]]

In [135]:
c = np.random.randn(10000)
print(c.mean(), c.var(), c.std())

0.018372692480022917 0.9768879497701731 0.9883764210917686


In [136]:
d = np.random.randn(10, 3)
print(d.mean())

0.11858939163052333


In [138]:
# random integer, low,high,size; high is exclusive
e = np.random.randint(3,10,size=(3,3)) # if we only pass one parameter, then from 0-x
print(e)

[[8 5 7]
 [5 5 8]
 [9 9 8]]


In [139]:
# with integer is between 0 up to integer exclusive
f = np.random.choice(7, size=10)

In [140]:
# with an array it draws random values from this array
g = np.random.choice([1,2,3,4], size=8)

## Linear Algebra (Eigenvalues / Solving Linear Systems)

In [141]:
a = np.array([[1,2], [3,4]])
eigenvalues, eigenvectors = np.linalg.eig(a)
# Note: use eigh if your matrix is symmetric (faster)

In [142]:
print(eigenvalues)

[-0.37228132  5.37228132]


In [143]:
print(eigenvectors) # column vectors

[[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]


In [144]:
print(eigenvectors[:,0]) # column 0 corresponding to eigenvalue[0]

[-0.82456484  0.56576746]


In [145]:
# verify: e-vec * e-val = A * e-vec
d = eigenvectors[:,0] * eigenvalues[0]
e = a @ eigenvectors[:, 0]

In [146]:
print(d, e) # [ 0.30697009 -0.21062466] [ 0.30697009 -0.21062466]
# looks the same, but:
print(d == e) 

[ 0.30697009 -0.21062466] [ 0.30697009 -0.21062466]
[ True False]


In [147]:
# correct way to compare matrix
print(np.allclose(d,e)) 

True


## Solving Linear Systems

In [148]:
# x1 + x2 = 2200
# 1.5 x1 + 4 x2 = 5050
# -> 2 equations and 2 unknowns

A = np.array([[1, 1], [1.5, 4]])
b = np.array([2200,5050])

# Ax = b <=> x = A-1 b

# But: inverse is slow and less accurate
x = np.linalg.inv(A).dot(b) # not recommended
print(x)

# instead use:
x = np.linalg.solve(A,b) # good
print(x) 

[1500.  700.]
[1500.  700.]


## Loading CSV Files

In [None]:
# 1) load with np.loadtxt()
# skiprows=1, ...
data = np.loadtxt('my_file.csv', delimiter=",",dtype=np.float32)
print(data.shape, data.dtype)

# 2) load with np.genfromtxt()
# similar but slightly more configuration parameters
# skip_header=0, missing_values="---", filling_values=0.0, ...
data = np.genfromtxt('my_file.csv', delimiter=",", dtype=np.float32)
print(data.shape)