## NumPy 

A python library that allows one to do numeric computations. Provides a general *array* datastructure as found in other languages and functions to manipulate arrays as matrices. Also provides several operations needed in scientific and numeric computations.

In [1]:
import numpy as np

Convert a list into an array

In [2]:
a = np.array([1,2,3,4,5])

In [3]:
print(a)
print(type(a))

[1 2 3 4 5]
<class 'numpy.ndarray'>


#### Accessing and setting values

In [4]:
print(a[3])    # access to an element is easy

4


In [5]:
a[3] = 8       # You can set values too
print(a)

[1 2 3 8 5]


#### Dimension and Shape

In [6]:
a.ndim    # attribute giving the number of dimensions

1

In [7]:
a.shape # tuple giving sizes on each dimension

(5,)

In [8]:
a.size   # total number of elements

5

In [9]:
a2 = np.array([[1,2,3],[4,5,6]])  # two dimension array. rowwise

In [10]:
print(a2)

[[1 2 3]
 [4 5 6]]


In [11]:
a2 = np.array([(1,2,3),(4,5,6)])  # also works

In [12]:
print(a2.ndim,a2.shape,a2.size)

2 (2, 3) 6


#### Matrix Operations

In [13]:
print(a2.T)  # Transpose

[[1 4]
 [2 5]
 [3 6]]


In [14]:
a2*a2     # Pointwise multiplication

array([[ 1,  4,  9],
       [16, 25, 36]])

In [15]:
a2@(a2.T)  # Matrix Multiplication

array([[14, 32],
       [32, 77]])

#### Constructing some default arrays of given size

In [16]:
a = np.zeros([2,2])
print(a,a.dtype)         # the attribue dtype tells you the datatype of the elements of the array

[[0. 0.]
 [0. 0.]] float64


In [17]:
a = np.zeros([2,2],dtype=np.int64)  # if you don't like floats
print(a)

[[0 0]
 [0 0]]


In [18]:
a = np.ones([3,3],dtype=np.float64)  # fill with 1's. Set type to complex
print(a)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [19]:
a = np.empty([3,3,4])  # uninitialized array in a 3 dimensional array
print(a)

[[[ 5.56268465e-309  2.02707639e+243 -3.22631608e-294 -3.22631907e-294]
  [-3.22632205e-294 -3.22632504e-294  6.61855951e+250  6.97654594e+250]
  [ 6.04607466e+174  7.33453704e+250  6.33301315e+173  1.71548999e+294]]

 [[ 5.71472456e+173  2.61763035e+289  6.30404327e+173  2.61716182e+289]
  [ 2.43400036e+159  6.01334412e-154  5.30586801e+180  1.41739263e+190]
  [ 4.36055452e+222  9.48080479e-154  1.49004942e+195  3.17095857e+180]]

 [[ 7.04378583e+199  1.33856864e-152  4.83245960e+276  1.79805224e+044]
  [ 1.96086579e+243  7.22756874e+159  1.68795313e+219  1.01151695e+261]
  [ 2.04736806e+190  1.62348215e+219  2.31597827e+251  1.34450615e+161]]]


Read the output as : There are 3 "rows" (i.e. the first dimension and their values are printed with a blank line inserted between each row). Each row is a two dimensional array and so is printed as 2 dimensional arrays are)

In [20]:
a = np.random.random([3,3,4])     # A random matrix
print(a) 

[[[0.24495576 0.63313369 0.77161171 0.46546887]
  [0.90498244 0.58899909 0.96316633 0.74678912]
  [0.627678   0.40199494 0.11050629 0.06037346]]

 [[0.46952336 0.48610862 0.63335105 0.26759305]
  [0.4980877  0.51669564 0.32549818 0.67044395]
  [0.32902382 0.92936409 0.08540377 0.60045041]]

 [[0.33306199 0.05786636 0.56475072 0.37129445]
  [0.50899873 0.33584694 0.64171721 0.21726473]
  [0.40401932 0.59072099 0.56916788 0.98010961]]]


In [21]:
np.array(range(1,20))

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19])

In [22]:
a = np.array([range(1,20),range(1,20)])
print(a)

[[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]]


You can also fill an array with values based on their indices.

In [23]:
def f(x,y):
    return x+y
    
a = np.fromfunction(f,[4,5])
print(a)

[[0. 1. 2. 3. 4.]
 [1. 2. 3. 4. 5.]
 [2. 3. 4. 5. 6.]
 [3. 4. 5. 6. 7.]]


In [24]:
a = np.fromfunction(f,[4,5],dtype=np.int64)
print(a)

[[0 1 2 3 4]
 [1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]]


#### arange,linspace -- fancier versions of range

In [25]:
np.array(np.arange(0,5,.35))

array([0.  , 0.35, 0.7 , 1.05, 1.4 , 1.75, 2.1 , 2.45, 2.8 , 3.15, 3.5 ,
       3.85, 4.2 , 4.55, 4.9 ])

In [26]:
np.array([np.linspace(0,5,18)]) # split in 18 parts

array([[0.        , 0.29411765, 0.58823529, 0.88235294, 1.17647059,
        1.47058824, 1.76470588, 2.05882353, 2.35294118, 2.64705882,
        2.94117647, 3.23529412, 3.52941176, 3.82352941, 4.11764706,
        4.41176471, 4.70588235, 5.        ]])

In [27]:
type(np.arange(0,3,2))

numpy.ndarray

#### Reshaping an array

In [28]:
b = np.linspace(0,5,18)  
b.reshape([3,6])                      # make a 3 x 6 array out of the 18 elements

array([[0.        , 0.29411765, 0.58823529, 0.88235294, 1.17647059,
        1.47058824],
       [1.76470588, 2.05882353, 2.35294118, 2.64705882, 2.94117647,
        3.23529412],
       [3.52941176, 3.82352941, 4.11764706, 4.41176471, 4.70588235,
        5.        ]])

In [29]:
b

array([0.        , 0.29411765, 0.58823529, 0.88235294, 1.17647059,
       1.47058824, 1.76470588, 2.05882353, 2.35294118, 2.64705882,
       2.94117647, 3.23529412, 3.52941176, 3.82352941, 4.11764706,
       4.41176471, 4.70588235, 5.        ])

In [30]:
b.resize([3,6])    # reshapes in place
b

array([[0.        , 0.29411765, 0.58823529, 0.88235294, 1.17647059,
        1.47058824],
       [1.76470588, 2.05882353, 2.35294118, 2.64705882, 2.94117647,
        3.23529412],
       [3.52941176, 3.82352941, 4.11764706, 4.41176471, 4.70588235,
        5.        ]])

In [31]:
b.ravel()  # linearize it row-wise

array([0.        , 0.29411765, 0.58823529, 0.88235294, 1.17647059,
       1.47058824, 1.76470588, 2.05882353, 2.35294118, 2.64705882,
       2.94117647, 3.23529412, 3.52941176, 3.82352941, 4.11764706,
       4.41176471, 4.70588235, 5.        ])

In [32]:
print(b)

[[0.         0.29411765 0.58823529 0.88235294 1.17647059 1.47058824]
 [1.76470588 2.05882353 2.35294118 2.64705882 2.94117647 3.23529412]
 [3.52941176 3.82352941 4.11764706 4.41176471 4.70588235 5.        ]]


#### min,max etc

In [33]:
print(b.min())
print(b.max())
print(b.sum())

0.0
5.0
45.0


In [34]:
b = (np.array(range(1,10))).reshape(3,3)
print(b)
print()
b.sum(axis=0)     # sum along columns --- project given axis out via sum

[[1 2 3]
 [4 5 6]
 [7 8 9]]



array([12, 15, 18])

In [35]:
b.sum(axis=1)     # sum along rows  --- project given axis out via sum

array([ 6, 15, 24])

In [36]:
c = np.array(range(1,28))
c.resize(3,3,3)
print(c)

[[[ 1  2  3]
  [ 4  5  6]
  [ 7  8  9]]

 [[10 11 12]
  [13 14 15]
  [16 17 18]]

 [[19 20 21]
  [22 23 24]
  [25 26 27]]]


In [37]:
c.sum(axis=0)

array([[30, 33, 36],
       [39, 42, 45],
       [48, 51, 54]])

In [38]:
c.sum(axis=1)

array([[12, 15, 18],
       [39, 42, 45],
       [66, 69, 72]])

In [39]:
c.sum(axis=2)

array([[ 6, 15, 24],
       [33, 42, 51],
       [60, 69, 78]])

In [40]:
b.cumsum(axis=1)  # cumulative sums

array([[ 1,  3,  6],
       [ 4,  9, 15],
       [ 7, 15, 24]], dtype=int32)

In [41]:
np.mean(b,axis=0)

array([4., 5., 6.])

#### Scalar operations work as expected

In [42]:
print(a)
print()
print(a*3) 

[[0 1 2 3 4]
 [1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]]

[[ 0  3  6  9 12]
 [ 3  6  9 12 15]
 [ 6  9 12 15 18]
 [ 9 12 15 18 21]]


In [43]:
print(a+3)

[[ 3  4  5  6  7]
 [ 4  5  6  7  8]
 [ 5  6  7  8  9]
 [ 6  7  8  9 10]]


In [44]:
a>5   # like addition etc, it operates pointwise

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False,  True],
       [False, False, False,  True,  True]])

#### Point-wise functions 

In [45]:
np.sin(b)

array([[ 0.84147098,  0.90929743,  0.14112001],
       [-0.7568025 , -0.95892427, -0.2794155 ],
       [ 0.6569866 ,  0.98935825,  0.41211849]])

In [46]:
np.cos(b)

array([[ 0.54030231, -0.41614684, -0.9899925 ],
       [-0.65364362,  0.28366219,  0.96017029],
       [ 0.75390225, -0.14550003, -0.91113026]])

In [47]:
np.sin(b)*np.sin(b) + np.cos(b)*np.cos(b)

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

#### Slicing, Iterations, stacking and splitting

In [48]:
b = np.array(range(0,18))
b = b.reshape(3,6)
print(b)
print()

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]



In [49]:
print(b[:,1])        # all rows but only column 1

[ 1  7 13]


In [50]:
print(b[:,1:3])      # all rows but only columns 1 and 2

[[ 1  2]
 [ 7  8]
 [13 14]]


In [51]:
for r in b:
    print(r)

[0 1 2 3 4 5]
[ 6  7  8  9 10 11]
[12 13 14 15 16 17]


In [52]:
for r in b.flat:
    print(r)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17


In [53]:
b.flat[3]

3

In [54]:
print(b)
print()
print(np.vstack([b,b]))

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]

[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]
 [ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]
 [12 13 14 15 16 17]]


In [55]:
a = b+10
np.hstack([a,b,a])

array([[10, 11, 12, 13, 14, 15,  0,  1,  2,  3,  4,  5, 10, 11, 12, 13,
        14, 15],
       [16, 17, 18, 19, 20, 21,  6,  7,  8,  9, 10, 11, 16, 17, 18, 19,
        20, 21],
       [22, 23, 24, 25, 26, 27, 12, 13, 14, 15, 16, 17, 22, 23, 24, 25,
        26, 27]])

In [56]:
np.vstack([a,b,a])

array([[10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21],
       [22, 23, 24, 25, 26, 27],
       [ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21],
       [22, 23, 24, 25, 26, 27]])

In [57]:
np.vsplit(np.vstack([a,b,a,b]),2)

[array([[10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21],
        [22, 23, 24, 25, 26, 27],
        [ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17]]), array([[10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21],
        [22, 23, 24, 25, 26, 27],
        [ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17]])]

In [58]:
a.size

18

In [59]:
np.array_split(np.vstack([a,b,a,b]),5)

[array([[10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21],
        [22, 23, 24, 25, 26, 27]]), array([[ 0,  1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17]]), array([[10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21]]), array([[22, 23, 24, 25, 26, 27],
        [ 0,  1,  2,  3,  4,  5]]), array([[ 6,  7,  8,  9, 10, 11],
        [12, 13, 14, 15, 16, 17]])]

In [60]:
np.array_split(np.vstack([a,b,a,b]),4,axis=1)

[array([[10, 11],
        [16, 17],
        [22, 23],
        [ 0,  1],
        [ 6,  7],
        [12, 13],
        [10, 11],
        [16, 17],
        [22, 23],
        [ 0,  1],
        [ 6,  7],
        [12, 13]]), array([[12, 13],
        [18, 19],
        [24, 25],
        [ 2,  3],
        [ 8,  9],
        [14, 15],
        [12, 13],
        [18, 19],
        [24, 25],
        [ 2,  3],
        [ 8,  9],
        [14, 15]]), array([[14],
        [20],
        [26],
        [ 4],
        [10],
        [16],
        [14],
        [20],
        [26],
        [ 4],
        [10],
        [16]]), array([[15],
        [21],
        [27],
        [ 5],
        [11],
        [17],
        [15],
        [21],
        [27],
        [ 5],
        [11],
        [17]])]

#### Copying Arrays

In [61]:
a = np.array(range(0,9))
a.resize(3,3)
print(a)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [62]:
b = a[:,:]      
print(a is b)

False


In [63]:
print(b)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [64]:
b[0,0] = 5

In [65]:
print(a)

[[5 1 2]
 [3 4 5]
 [6 7 8]]


In [66]:
print(b)

[[5 1 2]
 [3 4 5]
 [6 7 8]]


In [67]:
c = a.view()
print(c)

[[5 1 2]
 [3 4 5]
 [6 7 8]]


In [68]:
c[0,0] = 8
print(a)
print(c is a)

[[8 1 2]
 [3 4 5]
 [6 7 8]]
False


In [69]:
b.base is a

True

In [70]:
c.base is a

True

In [71]:
a.base is b

False

In [72]:
c = a
print(c is a)
print(c.base is a) #c and a are point to same obj therefore bases of both are none
print(a.base is c)

True
False
False


In [73]:
a.resize(1,9)

In [74]:
print(a)

[[8 1 2 3 4 5 6 7 8]]


In [75]:
print(b)

[[8 1 2]
 [3 4 5]
 [6 7 8]]


In [76]:
b[2,2] = 10

In [77]:
print(a)

[[ 8  1  2  3  4  5  6  7 10]]


How do we explain all this?

A NumPy array consists of two parts. The first part, the *data*, is a sequence of raw values and this is a mutable sequence storing the values in the array. It also contains a second part, the *metadata*,  containing information about dimensions, size of each dimension, datatypes etc. The metadata allows python to translate references based
on coordinate, say **a[3][4][2]** into the right location within the data. 

Every array box contains the metadata. But sometimes they don't contain data. Instead they have an attribute **base** 
which provides the location of a different array box that contains the data. (If the first part is within this box then **base** is set to **None**. This is done in the interests of efficiency. It avoids copying of data (expensive
in terms of time) and avoids storing multiple copies of data (expensive in terms of space). But this also means
we must be careful about changes to one array resulting in inadvertant changes to others.

When you write **c = a**, both **c** and **a** refer to the same box (of type **array**). 
When you write **b = a[:,:]** a new box is created and a copy of the metadata of part of **a** is placed in this
box. Then its base is set to **a**. The name **b** refers to this new box. In some sense, **b** is a view of the data in **a**. Actually, the correct way to create
such a view is to use the method **view** and write **b = a.view()**

Notice, that if we now resize **a** ( or **b** respectively) the metadata of **a** (**b** resp.) is changed. It has
no effect on the **b** (resp. **a**).

Finally, if you write **d = b.copy()** then a box is created with a new copy of both the data and the metadata. Here
Thus any changes to **d** has no effect on **b** and vice versa.


In [78]:
d = a.copy()
print(d)
print(a)

[[ 8  1  2  3  4  5  6  7 10]]
[[ 8  1  2  3  4  5  6  7 10]]


In [79]:
d.base is a

False

In [80]:
d.resize([1,9])
d[0,0] = 20
a[0,0] = 30
print(d)
print()
print(a)

[[20  1  2  3  4  5  6  7 10]]

[[30  1  2  3  4  5  6  7 10]]


In [81]:
a2=a.T

In [82]:
a3=a.reshape(9,1)

In [83]:
a2.base is a


True

In [84]:
a3.base is a

True

**Exercise**: When you write **a.T** or **a.reshape()** do you get a copy or a view? Experiment with **numpy** to find out.

#### Linear Algebra and more 

In [85]:
a = np.random.random([3,3])

In [86]:
np.linalg.inv(a)

array([[ -3.18986769,  -0.98407197,   3.46263091],
       [  1.92178507,  -1.81689469,   1.36776751],
       [  5.87428718,  11.74587219, -13.34357123]])

In [87]:
np.linalg.eig(a)

(array([ 1.0204415 , -0.27523208, -0.06370642]),
 array([[-0.41386085, -0.75547242, -0.27055881],
        [-0.61334721,  0.59839319, -0.05723797],
        [-0.67269934,  0.26680894,  0.96100039]]))

In [88]:
mu = 10
sigma = 2
a = np.random.normal(mu,sigma,20) # generate 20 values drawn from the normal distribut with mean 10 and sd 2
print(a)


[10.16297018  9.04102447  7.87530544  7.86789282 11.19058985  9.15147022
 10.89237955 12.17551979 12.95997359 12.87701202 11.0505093   7.84702116
  8.75681008 13.39792206 10.11021434 12.8230744   9.25111429  7.22405861
  9.4381344  11.00609163]
