# numpy array

extensively used in DS/ML

contains python objects:
- int, float
- list, tuples, dicts (called containers)


Numpy provides
- extension to Python multi-dim arrays
- closer to hardware (efficiency)
- also called array oriented computing

In [2]:
import numpy as np

In [3]:
print(np.arange(10))

[0 1 2 3 4 5 6 7 8 9]


### Why is it useful: fast numerical operations

In [8]:
#python lists
L = range(1000)
%timeit [i**2 for i in L]

273 µs ± 6.85 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [10]:
# numpy lists
K = np.arange(1000)
%timeit K**2
#K**2

976 ns ± 21.7 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


array([     0,      1,      4,      9,     16,     25,     36,     49,
           64,     81,    100,    121,    144,    169,    196,    225,
          256,    289,    324,    361,    400,    441,    484,    529,
          576,    625,    676,    729,    784,    841,    900,    961,
         1024,   1089,   1156,   1225,   1296,   1369,   1444,   1521,
         1600,   1681,   1764,   1849,   1936,   2025,   2116,   2209,
         2304,   2401,   2500,   2601,   2704,   2809,   2916,   3025,
         3136,   3249,   3364,   3481,   3600,   3721,   3844,   3969,
         4096,   4225,   4356,   4489,   4624,   4761,   4900,   5041,
         5184,   5329,   5476,   5625,   5776,   5929,   6084,   6241,
         6400,   6561,   6724,   6889,   7056,   7225,   7396,   7569,
         7744,   7921,   8100,   8281,   8464,   8649,   8836,   9025,
         9216,   9409,   9604,   9801,  10000,  10201,  10404,  10609,
        10816,  11025,  11236,  11449,  11664,  11881,  12100,  12321,
      

In [15]:
a = np.arange(100)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [18]:
a.ndim # not ndim()???

1

In [20]:
a.shape

(100,)

In [21]:
len(a)

100

In [26]:
b = np.array([[1,2,3],[4,5,6]])
b


array([[1, 2, 3],
       [4, 5, 6]])

In [27]:
b.ndim

2

In [28]:
b.shape

(2, 3)

In [29]:
len(b)

2

In [33]:
c = np.array([[[0,1], [2,3]], [[4,5], [6,7]]])  

In [34]:
c.ndim

3

In [35]:
c.shape

(2, 2, 2)

# ways to create numpy arrays

In [37]:
a = np.arange(1,10,2) #start, end (exclusive), step

In [38]:
a

array([1, 3, 5, 7, 9])

In [42]:
#using linspace

b = np.linspace(0, 10, 5)  #start, end, nu of points
b

array([ 0. ,  2.5,  5. ,  7.5, 10. ])

In [44]:
#common arrays

a = np.ones((3,3,3))
a

array([[[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]])

In [45]:
a.ndim

3

In [51]:
b = np.zeros((2,2))
b

array([[0., 0.],
       [0., 0.]])

In [54]:
c = np.eye((3))  #return a I (eye) dentity matrix
c

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [56]:
c = np.eye(3,2)
c

array([[1., 0.],
       [0., 1.],
       [0., 0.]])

In [58]:
# create array using diag

d = np.diag([1,2,3,4,5])
d

array([[1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0],
       [0, 0, 0, 0, 5]])

In [59]:
np.diag(d)

array([1, 2, 3, 4, 5])

In [67]:
# create array using random
#import random
a = np.random.rand(5)
a

array([0.71086921, 0.66179863, 0.25974843, 0.37241509, 0.85986959])

In [68]:
a = np.random.randn(5) # rand normal = randn
a

array([-0.41880911,  2.00900822,  1.5081583 ,  0.83423734, -0.75212189])

# Datatypes

In [72]:
a = np.arange(10)
print(type(a))
a.dtype

<class 'numpy.ndarray'>


dtype('int64')

In [73]:
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [78]:
a = np.arange(10, dtype = "float64")

In [79]:
a

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [80]:
a.dtype

dtype('float64')

In [83]:
a = np.zeros((3,3))

print(a)

a.dtype

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


dtype('float64')

In [86]:
d = np.array([1+2j, 3+4j])
print(d)
d.dtype

[1.+2.j 3.+4.j]


dtype('complex128')

In [88]:
b = np.array([True, False])
print(b)
b.dtype

[ True False]


dtype('bool')

# indexing and slicing


## indexing

In [89]:
a = np.arange(10)
a

# indices begin at 0

In [97]:
# for multidim arrays, indexes are lists of integeres

a = np.diag([1,2,3,4,5])
a

array([[1, 0, 0, 0, 0],
       [0, 2, 0, 0, 0],
       [0, 0, 3, 0, 0],
       [0, 0, 0, 4, 0],
       [0, 0, 0, 0, 5]])

In [98]:
a[2,2]

3

In [99]:
a[3,4] = 10

In [100]:
a

array([[ 1,  0,  0,  0,  0],
       [ 0,  2,  0,  0,  0],
       [ 0,  0,  3,  0,  0],
       [ 0,  0,  0,  4, 10],
       [ 0,  0,  0,  0,  5]])

## slicing

In [101]:
a = np.arange(10)

In [102]:

a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [106]:
a[3:8:2] # start, end (not inclusive), step

array([3, 5, 7])

In [108]:
# combine assignement and slicind
 
a[5:9] = 10 # make all values from 5 to 8 assign to 10
a

array([ 0,  1,  2,  3,  4, 10, 10, 10, 10,  9])

In [110]:
# reverse the list
b = np.arange(10)
c = b[::-1] #start, end, reverse 
c

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

# copies and views

In [130]:
a = np.arange(10)
print(a)
id(a)

[0 1 2 3 4 5 6 7 8 9]


4521854896

In [131]:
b = a[::2]  # directing to the same memory location 
print(b)
id(b)

[0 2 4 6 8]


4514653360

In [132]:
np.shares_memory(a, b) # stores a, b in the same memory location  using the "view" method unlike "copy" (which eats up RAM)

True

In [133]:
b[0] = 11

In [134]:
a  # a is also changed!!!!

array([11,  1,  2,  3,  4,  5,  6,  7,  8,  9])

In [135]:
# forcing a copy 

a = np.arange(10)

c = a[::2].copy() # a, c points to different locations


In [136]:
np.shares_memory(a,c)

False

In [137]:
id(a)


4514441328

In [138]:
id(c)

4521854416

# Fancy indexing

## Numy array can be indexed with slices, bult also with boolena or integer arrays (amsks). It creates copies and not views

In [140]:
a = np.random.randint(0,30,20) # randint in [0,30] len(a) = 20
a

array([ 4, 13,  6, 25,  0, 23, 17,  3,  8,  0, 14, 16, 29,  3, 14, 16, 28,
       11,  8,  1])

In [141]:
mask = (a % 2 !=0)

In [142]:
a[mask]

array([13, 25, 23, 17,  3, 29,  3, 11,  1])

indexing with a mask can be useful to assign values to a subarray

In [143]:
a[mask] = -20

In [144]:
a

array([  4, -20,   6, -20,   0, -20, -20, -20,   8,   0,  14,  16, -20,
       -20,  14,  16,  28, -20,   8, -20])

indexing with an array of integers

In [146]:
a = np.arange(0,100,10)
a

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [148]:
# indexing can be done with an array of indetegrs
a[[2,4,5,2,6]]

array([20, 40, 50, 20, 60])

In [149]:
a[[9,7]] = -100
a

array([   0,   10,   20,   30,   40,   50,   60, -100,   80, -100])