## Numpy code along


In [1]:
# 'np' is the conventional alias for numpy
import numpy as np

#### Numpy arrays

In [2]:
np.array([1,2,3])

array([1, 2, 3])

#### Generate an array with random numbers

In [6]:
# define a random seed first!
np.random.seed(5)
np.random.random(10)

array([0.22199317, 0.87073231, 0.20671916, 0.91861091, 0.48841119,
       0.61174386, 0.76590786, 0.51841799, 0.2968005 , 0.18772123])

### Other ways to create arrays

#### List to array

This works the same way whether you have a list of lists, a list of tuples, a tuple of lists, or a tuple of tuples.

In [7]:
lst =[43,356,2,31,5,6,1]

In [8]:
np.array(lst)

array([ 43, 356,   2,  31,   5,   6,   1])

In [None]:
[1,2,3]+[4,5,6]

In [10]:
np.array([1,2,3]) + np.array([4,5,6])

array([5, 7, 9])

#### Constant arrays

In [9]:
# Create an array of all zeros
np.zeros(12)

# Create an array of all ones
np.ones(12)

# Create any constant array
np.full(8,10)

array([10, 10, 10, 10, 10, 10, 10, 10])

#### Sequential arrays

In [12]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [13]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [17]:
np.random.seed(1)
np.random.normal(loc = 0, scale = 1, size = (3,3))

array([[ 1.62434536, -0.61175641, -0.52817175],
       [-1.07296862,  0.86540763, -2.3015387 ],
       [ 1.74481176, -0.7612069 ,  0.3190391 ]])

In [22]:
# Create a 3x3 array of random integers in the interval [0, 10)
a=np.random.randint(0,10,(3,3))

In [28]:
b=np.random.randint(0,10,(3,3), dtype=int8)

NameError: name 'int8' is not defined

#### Array Attributes

shape, size, number of dimentions, data type, item size, number of bytes...

In [24]:
a.shape

(3, 3)

In [25]:
a.size

9

In [26]:
a.ndim

2

In [27]:
a.dtype

dtype('int64')

In [29]:
a.itemsize

8

In [30]:
a.nbytes

72

#### Array indexing

In [32]:
a

array([[8, 3, 9],
       [8, 7, 3],
       [6, 5, 1]])

In [31]:
a[0,0]

8

In [33]:
# third colum
a[1,1:3]

array([7, 3])

In [34]:
a[1,1:]

array([7, 3])

In [36]:
a[1:,1:]

array([[7, 3],
       [5, 1]])

In [37]:
a[1:3,1:3]

array([[7, 3],
       [5, 1]])

In [38]:
a[-1]

array([6, 5, 1])

#### Modifying a slice will modify the array

In [49]:
a


array([[99,  3,  9],
       [ 8,  7,  3],
       [ 6,  5,  1]])

In [40]:
a[0,0]

8

In [41]:
a[0,0]=100

In [50]:
b=a[0,0]

In [43]:
b

100

In [51]:
a[0,0]=99

In [52]:
b

99

In [55]:
a_chunk[0,0]=0
print(a_chunk)
print("\n")
print(a_chunk)

NameError: name 'a_chunk' is not defined

#### Boolean indexing

This is what you do when you need to select all elements greater than a constant, for example.

In [57]:
a

array([[99,  3,  9],
       [ 8,  7,  3],
       [ 6,  5,  1]])

In [59]:
(a>5)

array([[ True, False,  True],
       [ True,  True, False],
       [ True, False, False]])

In [60]:
a[(a>5)]

array([99,  9,  8,  7,  6])

In [61]:
a[(a>np.mean(a))]

array([99])

#### Reshaping arrays

In [63]:
np.arange(1,10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [64]:
np.arange(1,10).reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

#### Subarrays return views, not copies!

#### Creating copies

In [65]:
a_copy=a.copy()

In [66]:
a_copy

array([[99,  3,  9],
       [ 8,  7,  3],
       [ 6,  5,  1]])

#### Arrays with 3+ dimensions

In [70]:
np.random.randint(low=1, high =1000, size=(5,2,3))

array([[[671, 476, 911],
        [500, 788, 252]],

       [[667, 236,  87],
        [392, 740,  54]],

       [[432, 879, 445],
        [756, 291, 101]],

       [[741, 801, 916],
        [452, 793, 724]],

       [[230, 607, 167],
        [688, 232, 902]]])

In [69]:
np.random.random((5,2,3))

array([[[0.24621107, 0.86002795, 0.53883106],
        [0.55282198, 0.84203089, 0.12417332]],

       [[0.27918368, 0.58575927, 0.96959575],
        [0.56103022, 0.01864729, 0.80063267]],

       [[0.23297427, 0.8071052 , 0.38786064],
        [0.86354185, 0.74712164, 0.55624023]],

       [[0.13645523, 0.05991769, 0.12134346],
        [0.04455188, 0.10749413, 0.22570934]],

       [[0.71298898, 0.55971698, 0.01255598],
        [0.07197428, 0.96727633, 0.56810046]]])

### Operations:

- np.sum, np.multiply, np.power...


In [71]:
a

array([[99,  3,  9],
       [ 8,  7,  3],
       [ 6,  5,  1]])

In [72]:
a**2

array([[9801,    9,   81],
       [  64,   49,    9],
       [  36,   25,    1]])

Lists do not behave the same way (sum means concatenation)

In [74]:
np.array([1,2])+np.array([3,4])

array([4, 6])

#### Aggregations: 

- np.max, np.mean, np.std...

In [75]:
# compute the standard deviation of this array, first using np.std() and then without using this function
np.random.seed(123)
rand = np.random.random(10)
rand

array([0.69646919, 0.28613933, 0.22685145, 0.55131477, 0.71946897,
       0.42310646, 0.9807642 , 0.68482974, 0.4809319 , 0.39211752])

In [76]:
np.mean(rand)

0.544199352975335

In [78]:
#standart deviation
np.std(rand)

0.21758256938579879

In [79]:
#standart deviation, classmate idea
np.sqrt(np.sum((rand-np.mean(rand))**2)/(len(rand)))

0.21758256938579879

In [80]:
#standart deviation Guillem explanation step by step
deviations = rand - np.mean(rand)
deviations

array([ 0.15226983, -0.25806002, -0.3173479 ,  0.00711542,  0.17526962,
       -0.12109289,  0.43656485,  0.14063039, -0.06326745, -0.15208183])

In [81]:
#standart deviation
deviations_squared = deviations**2
deviations_squared

array([2.31861019e-02, 6.65949729e-02, 1.00709689e-01, 5.06291464e-05,
       3.07194386e-02, 1.46634887e-02, 1.90588864e-01, 1.97769054e-02,
       4.00277042e-03, 2.31288845e-02])

In [82]:
mean_deviations_squared = np.mean(deviations_squared)
mean_deviations_squared

0.04734217450052595

In [83]:
np.sqrt(mean_deviations_squared)

0.21758256938579879

In [None]:
#broken bones and favorite numbers
bones=

#### Performance of numpy operations vs lists

In [86]:
from time import time

n = 1000000

In [95]:
start_time=time()

list_of_numbers=[]

for i in range(n):
    list_of_numbers.append(i**5)

end_time=time()

In [96]:
end_time-start_time

0.8942980766296387

In [97]:
list_of_numbers

[0,
 1,
 32,
 243,
 1024,
 3125,
 7776,
 16807,
 32768,
 59049,
 100000,
 161051,
 248832,
 371293,
 537824,
 759375,
 1048576,
 1419857,
 1889568,
 2476099,
 3200000,
 4084101,
 5153632,
 6436343,
 7962624,
 9765625,
 11881376,
 14348907,
 17210368,
 20511149,
 24300000,
 28629151,
 33554432,
 39135393,
 45435424,
 52521875,
 60466176,
 69343957,
 79235168,
 90224199,
 102400000,
 115856201,
 130691232,
 147008443,
 164916224,
 184528125,
 205962976,
 229345007,
 254803968,
 282475249,
 312500000,
 345025251,
 380204032,
 418195493,
 459165024,
 503284375,
 550731776,
 601692057,
 656356768,
 714924299,
 777600000,
 844596301,
 916132832,
 992436543,
 1073741824,
 1160290625,
 1252332576,
 1350125107,
 1453933568,
 1564031349,
 1680700000,
 1804229351,
 1934917632,
 2073071593,
 2219006624,
 2373046875,
 2535525376,
 2706784157,
 2887174368,
 3077056399,
 3276800000,
 3486784401,
 3707398432,
 3939040643,
 4182119424,
 4437053125,
 4704270176,
 4984209207,
 5277319168,
 5584059449,
 5

In [100]:
start_time = time()

array_of_numbers = np.arange(n) ** 5

end_time = time()

end_time - start_time

0.019228219985961914

In [101]:
array_of_numbers


array([                   0,                    1,                   32,
       ..., -8766765102231392691,  6592478144606745568,
        5058113244275780415])

#### Concatenate

In [106]:
first = np.array([[1,2,3],[4,5,6]])
second = np.array([[0,0,0], [9,9,9]])

In [107]:
np.concatenate(first, second)

TypeError: only integer scalar arrays can be converted to a scalar index

#### Transpose

#### Splitting arrays

In [95]:
hundred = np.array(range(1,101))

### Bonus: broadcasting

In [77]:
# We will add the vector v to each row of the matrix x,
# storing the result in the matrix y
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = np.empty_like(x)   # Create an empty matrix with the same shape as x

# Add the vector v to each row of the matrix x with an explicit loop
for i in range(4):
    y[i, :] = x[i, :] + v

print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


This works; however when the matrix x is very large, computing an explicit loop in Python could be slow. Note that adding the vector v to each row of the matrix x is equivalent to forming a matrix vv by stacking multiple copies of v vertically, then performing elementwise summation of x and vv. We could implement this approach like this:

In [78]:
vv = np.tile(v, (4, 1))  # Stack 4 copies of v on top of each other
print(vv)                # Prints "[[1 0 1]
                         #          [1 0 1]
                         #          [1 0 1]
                         #          [1 0 1]]"

[[1 0 1]
 [1 0 1]
 [1 0 1]
 [1 0 1]]


In [79]:
y = x + vv  # Add x and vv elementwise
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [80]:
y = x + v  # Add v to each row of x using broadcasting
print(y)

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


[how broadcasting works](https://numpy.org/doc/stable/user/basics.broadcasting.html)

Some applications of bradcasting:

In [38]:
# Compute outer product of vectors
v = np.array([1,2,3])  # v has shape (3,)
w = np.array([4,5])    # w has shape (2,)
# To compute an outer product, we first reshape v to be a column
# vector of shape (3, 1); we can then broadcast it against w to yield
# an output of shape (3, 2), which is the outer product of v and w:

print(np.reshape(v, (3, 1)) * w)

[[ 4  5]
 [ 8 10]
 [12 15]]


In [83]:
# Add a vector to each row of a matrix
x = np.array([[1,2,3], [4,5,6]])
# x has shape (2, 3) and v has shape (3,) so they broadcast to (2, 3),
# giving the following matrix:

print(x + v)

[[2 4 6]
 [5 7 9]]


In [84]:
# Add a vector to each column of a matrix
# x has shape (2, 3) and w has shape (2,).
# If we transpose x then it has shape (3, 2) and can be broadcast
# against w to yield a result of shape (3, 2); transposing this result
# yields the final result of shape (2, 3) which is the matrix x with
# the vector w added to each column. Gives the following matrix:

print((x.T + w).T)

[[ 5  6  7]
 [ 9 10 11]]


In [70]:
x = c[0,0]

print("x", "\n", x)

y = c[0,1]
print("y", "\n", y)

x 
 7
y 
 7


In [27]:
# Add elements of x and y together
print(np.add(x, y))

[[1.22458098 1.16849464 0.49307671 0.95069974 1.11837576]
 [1.18180799 0.81070707 0.92343696 0.84729514 0.40211329]
 [0.80629803 0.60882671 0.80446534 1.70746551 0.75776518]
 [0.99696165 0.720522   0.20863104 1.61934258 0.26416839]]


In [28]:
# Subtract elements of x from elements of y
print(np.subtract(y, x))

[[ 0.6605368   0.24431389 -0.15583537 -0.11847305 -0.21446654]
 [-0.74861066  0.70875269 -0.68495345 -0.46618016  0.12571833]
 [ 0.76993308 -0.47848158 -0.13150716  0.05737346  0.09249062]
 [-0.45836073 -0.57693357 -0.03191186  0.01125223  0.16068396]]


In [33]:
# Multiply elements of x and y together
print(np.multiply(x, y))

[[0.26582243 0.32642261 0.05470999 0.22244853 0.30119211]
 [0.20906305 0.0387289  0.09589365 0.12514628 0.0364725 ]
 [0.01432989 0.03543133 0.15746759 0.72803669 0.14141339]
 [0.19595949 0.0465749  0.01062714 0.65553594 0.0109914 ]]


In [None]:
# Divide elements of y by elements of x
print(np.divide(y, x))