# Introduction to Numpy

- NumPy is a linear algebra library for python
- Basic building blocks for almost all of the libraries in the PyData Ecosystem
- Numpy is also incredibly fast, as it has binding to C libraries.
- Numpy arrays: vectors and matrices
- Broadcasting: simplified, but powerful array interactions


In [1]:
%%HTML
<iframe src="http://scipy.org" width=1000 height=800></iframe>

# NumPy Arrays vs List

## List in Python

In [2]:
my_list = [1,2,3]

In [3]:
my_list

[1, 2, 3]

In [4]:
my_list + [2,3]

[1, 2, 3, 2, 3]

In [5]:
my_list.append(3)
print(my_list)

[1, 2, 3, 3]


## Array in NumPy

### 定义 np.array 的方法

- `np.array()` : 将 list 化成 array
- `np.arange()`
- `np.linspace()`
- `np.zeros`, `np.ones`, `np.eye`
- `np.random.rand`, `np.random.randn`, `np.random.randint`
- list 表达式：list vs generator （注意方括号和圆括号的区别, `range` vs `xrange`）
    > ```L = [i+i for i in arange(1,3)]```

    > ```L = (i+i for i in arange(1,3))```

In [6]:
print(my_list)

[1, 2, 3, 3]


In [7]:
import numpy as np

In [8]:
arr = np.array(my_list)

In [79]:
******

SyntaxError: invalid syntax (<ipython-input-79-b09ebdbe9a8e>, line 1)

In [78]:
arr + np.array([2, 3, 1, 4])

ValueError: operands could not be broadcast together with shapes (11,) (4,) 

In [61]:
my_array = np.array([11,12,13])
print(my_array.shape)

(3,)


In [68]:
my_mat = [[1,2,3],[4,5,6],[7,8,9]]

In [63]:
print(np.array(my_mat).shape)

(3, 3)


In [64]:
np.array(my_mat)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [65]:
np.arange(0,11,1.5)

array([ 0. ,  1.5,  3. ,  4.5,  6. ,  7.5,  9. , 10.5])

In [69]:
np.zeros(3)

array([0., 0., 0.])

In [70]:
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [None]:
np.ones((3,3))

In [None]:
np.linspace(0,5,10)

In [None]:
np.eye(4)

In [None]:
np.random.rand(3,3)

In [None]:
np.random.randn(3,3)

In [None]:
np.random.randint(1,100,10) # 10 个 1 到 100 间的随机整数

In [None]:
arr = np.arange(9)

In [None]:
arr.reshape(3,3)

In [None]:
arr.max()

In [None]:
arr.min()

In [76]:
arr.argmin() # 返回最小值的 index

0

In [77]:
arr.argmax() # 返回最大值的 index

10

In [None]:
arr.shape

In [None]:
arr.dtype

In [80]:
my_array.dtype

dtype('int32')

In [None]:
arr = np.array([1,2,3],dtype=np.float64)
arr

## Numpy indexing and selection

In [83]:
arr = np.arange(0,11)

In [84]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
arr[8]

In [85]:
arr[-1]

10

In [None]:
arr[1:5]

In [None]:
arr[0:]

In [None]:
arr[:6]

In [None]:
arr[:] = 100

In [None]:
arr

In [9]:
arr = np.arange(0,11)

In [10]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [11]:
slice_of_arr = arr[0:6]

In [12]:
slice_of_arr

array([0, 1, 2, 3, 4, 5])

In [13]:
slice_of_arr[:]=99

In [14]:
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [15]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [16]:
arr_copy=arr.copy()

In [17]:
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [18]:
arr_copy[:]=100

In [19]:
arr_copy

array([100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100])

In [20]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [21]:
arr_2d = np.array([[5,10,15],[20,25,30],[35,40,45]])

In [22]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [23]:
arr_2d[0][0]

5

In [24]:
arr_2d[0]

array([ 5, 10, 15])

In [25]:
arr_2d[1][1]

25

In [26]:
arr_2d[1,1]

25

## Numpy operations

* Array with array
* Array with Scalars
* universal array functions

In [27]:
arr = np.arange(0,11)

In [28]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [29]:
arr - 5

array([-5, -4, -3, -2, -1,  0,  1,  2,  3,  4,  5])

In [30]:
arr + arr

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [31]:
arr * arr

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100])

In [32]:
arr ** 2

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100], dtype=int32)

In [33]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766])

In [34]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04])

In [35]:
arr.max()

10

In [36]:
np.max(arr)

10

In [37]:
np.sin(arr)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849,
       -0.54402111])

## np.array broadcasting

In [38]:
import numpy as np

In [39]:
arr1 = np.random.randn(5,1)

In [40]:
arr1.shape

(5, 1)

In [41]:
arr1 + 100

array([[100.56940815],
       [100.0474809 ],
       [ 99.81899758],
       [101.25691028],
       [ 99.25623832]])

In [86]:
arr2 = arr1.T

In [87]:
arr2

array([[ 0.56940815,  0.0474809 , -0.18100242,  1.25691028, -0.74376168]])

In [44]:
arr2 + 100

array([[100.56940815, 100.0474809 ,  99.81899758, 101.25691028,
         99.25623832]])

In [45]:
arr3 = np.random.randn(5,3)

In [46]:
arr3

array([[ 0.39112852,  2.2455275 , -0.47510199],
       [ 0.54422078,  1.6409831 , -0.4958595 ],
       [ 1.73220392, -0.20758167,  1.35365461],
       [ 0.30186392,  0.03677094,  0.17464439],
       [-0.63874652,  0.61495442,  0.54189973]])

In [47]:
arr3 + arr1

array([[ 0.96053667,  2.81493564,  0.09430615],
       [ 0.59170168,  1.688464  , -0.4483786 ],
       [ 1.5512015 , -0.38858409,  1.1726522 ],
       [ 1.55877419,  1.29368121,  1.43155467],
       [-1.3825082 , -0.12880726, -0.20186195]])

# 条件过滤



In [88]:
import numpy as np
L = np.array([[12,22,56,32],[11,13,17,12]])

In [89]:
L

array([[12, 22, 56, 32],
       [11, 13, 17, 12]])

In [50]:
filter = (L >20)

In [90]:
filter

array([[False,  True,  True,  True],
       [False, False, False, False]])

In [91]:
L[filter]

array([22, 56, 32])

In [92]:
L[L>20]

array([22, 56, 32])

In [54]:
L1

NameError: name 'L1' is not defined

In [None]:
L [L>20] += 100
print(L)

In [55]:
L[L % 2 == 0] += 100

In [56]:
L

array([[112, 122, 132, 156],
       [ 11,  13,  17, 112]])

# Speedtest: ndarrays vs lists



In [57]:
from numpy import arange
from timeit import Timer

size = 1000000
timeits = 1000

In [58]:
nd_array = arange(size)
a_list = range(size)

In [59]:
%%timeit
sum(a_list)
#timer_numpy = Timer("nd_array.sum()","from __main__ import nd_array")

#print("Time taken by numpy ndarray: %f seconds" % (timer_numpy.timeit(timeits)/timeits))


77.6 ms ± 14.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [60]:
%%timeit 
nd_array.sum()
#timer_list = Timer("sum(a_list)","from __main__ import a_list")
#print("Time taken by list: %f seconds" % (timer_list.timeit(timeits)/timeits))

1.02 ms ± 33.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
