# Numpy 
- ref: http://shop.oreilly.com/product/0636920034919.do

In [3]:
import numpy as np
np.__version__

'1.15.4'

### Creating Arrays from Python Lists

In [4]:
np.array([1, 4, 2, 5, 3])

array([1, 4, 2, 5, 3])

If types do not match, NumPy will upcast if possible (here, integers are up-cast to floating point):

In [5]:
np.array([3.14, 4, 2, 3])

array([3.14, 4.  , 2.  , 3.  ])

explicitly set the data type of the resulting array, we can use the dtype keyword:

In [6]:
np.array([1, 2, 3, 4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

In [7]:
# nested lists result in multi-dimensional arrays
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

In [89]:
A = np.array([1, 2, 3, 4, 5])
# Five Rows

In [90]:
A.shape

(5,)

In [91]:
B = np.array([[1, 2, 3, 4, 5]])

In [92]:
B.shape

(1, 5)

### Creating Arrays

In [8]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [9]:
# Create a 3x5 floating-point array filled with ones
np.ones((3, 5), dtype=float)


array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [10]:
# Create a 3x5 array filled with 3.14
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [12]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [13]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [14]:
# Create a 3x3 array of unifrom distributed
# random values between 0 and 1
np.random.random((3, 3))

array([[0.5224557 , 0.41500734, 0.87683008],
       [0.51989454, 0.05566028, 0.10561567],
       [0.67421044, 0.29276235, 0.52353762]])

In [15]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
np.random.normal(0, 1, (3, 3))

array([[ 0.7540994 , -0.35650309,  0.07291475],
       [-0.73203981, -1.52595578, -0.27691365],
       [-1.47724068,  0.26075474, -0.72816693]])

In [16]:
# Create a 3x3 array of random integers in the interval [0, 10)
np.random.randint(0, 10, (3, 3))

array([[7, 0, 0],
       [9, 8, 7],
       [9, 9, 0]])

In [17]:
# Create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [18]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that memory location
np.empty(3)

array([1., 1., 1.])

In [19]:
np.empty(len([1,2,3,4,5]))

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

empty, unlike zeros, does not set the array values to zero, and may therefore be marginally faster. On the other hand, it requires the user to manually set all the values in the array, and should be used with caution.

In [23]:
# Empty Return a new array of given shape and type, 
# without initializing entries.
np.empty([2, 2])

array([[0.25, 0.5 ],
       [0.75, 1.  ]])

In [24]:
np.empty([2, 2], dtype=int)

array([[4598175219545276416, 4602678819172646912],
       [4604930618986332160, 4607182418800017408]])

In [25]:
values = np.random.randint(1, 10, size=5)
big_array = np.random.randint(1, 100, size=1000000)

In [27]:
values

array([1, 9, 2, 3, 7])

In [29]:
big_array.shape

(1000000,)

In [33]:
print(1.0 / values)

[1.         0.11111111 0.5        0.33333333 0.14285714]


In [26]:
%timeit (1.0 / big_array)

2.44 ms ± 49.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [38]:
x = np.arange(9).reshape((3, 3))
x

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [39]:
2 ** x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

### Array Arithmetic:

In [40]:
# Array Arithmetic:
x = np.arange(4)
print("x     =", x)
print("x + 5 =", x + 5)
print("x - 5 =", x - 5)
print("x * 2 =", x * 2)
print("x / 2 =", x / 2)
print("x // 2 =", x // 2) 

x     = [0 1 2 3]
x + 5 = [5 6 7 8]
x - 5 = [-5 -4 -3 -2]
x * 2 = [0 2 4 6]
x / 2 = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]


In [41]:
print("-x     = ", -x)
print("x ** 2 = ", x ** 2)
print("x % 2  = ", x % 2)

-x     =  [ 0 -1 -2 -3]
x ** 2 =  [0 1 4 9]
x % 2  =  [0 1 0 1]


In [42]:
-(0.5*x + 1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

In [44]:
x

array([0, 1, 2, 3])

In [43]:
np.add(x, 2)

array([2, 3, 4, 5])

### Absolute Value

In [45]:
x = np.array([-2, -1, 0, 1, 2])
abs(x)

array([2, 1, 0, 1, 2])

In [46]:
np.absolute(x)

array([2, 1, 0, 1, 2])

In [47]:
np.abs(x)

array([2, 1, 0, 1, 2])

### Exponents and logarithms

In [53]:
x = [1, 2, 3]
print("x     =", x)
print("e^x   =", np.exp(x))
print("2^x   =", np.exp2(x))
print("3^x   =", np.power(3, x))

x     = [1, 2, 3]
e^x   = [ 2.71828183  7.3890561  20.08553692]
2^x   = [2. 4. 8.]
3^x   = [ 3  9 27]


The inverse of the exponentials, the logarithms, are also available. The basic np.log gives the natural logarithm; if you prefer to compute the base-2 logarithm or the base-10 logarithm, these are available as well:

In [54]:
x = [1, 2, 4, 10]
print("x        =", x)
print("ln(x)    =", np.log(x))
print("log2(x)  =", np.log2(x))
print("log10(x) =", np.log10(x))

x        = [1, 2, 4, 10]
ln(x)    = [0.         0.69314718 1.38629436 2.30258509]
log2(x)  = [0.         1.         2.         3.32192809]
log10(x) = [0.         0.30103    0.60205999 1.        ]


In [55]:
x = [0, 0.001, 0.01, 0.1]
print("exp(x) - 1 =", np.expm1(x))
print("log(1 + x) =", np.log1p(x))

exp(x) - 1 = [0.         0.0010005  0.01005017 0.10517092]
log(1 + x) = [0.         0.0009995  0.00995033 0.09531018]


### Specifying output
For large calculations, it is sometimes useful to be able to specify the array where the result of the calculation will be stored. Rather than creating a temporary array, this can be used to write computation results directly to the memory location where you'd like them to be. For all ufuncs, this can be done using the out argument of the function:

In [66]:
x = np.arange(5)
x

array([0, 1, 2, 3, 4])

In [67]:
y = np.empty(5)
y

array([0., 0., 0., 0., 0.])

In [68]:
np.multiply(x, 10, out=y)
print(y)

[ 0. 10. 20. 30. 40.]


A reduce repeatedly applies a given operation to the elements of an array until only a single result remains.


In [74]:
x = np.arange(1, 6)
x

array([1, 2, 3, 4, 5])

In [75]:
np.add.reduce(x)

15

In [76]:
np.multiply.reduce(x)

120

In [77]:
#Minimum and Maximum
big_array = np.random.rand(1000000)
min(big_array), max(big_array)


(9.462852297303925e-07, 0.9999998136924644)

NumPy's corresponding functions have similar syntax, and again operate much more quickly:

In [78]:
np.min(big_array), np.max(big_array)


(9.462852297303925e-07, 0.9999998136924644)

In [79]:
%timeit min(big_array)
%timeit np.min(big_array)

64.7 ms ± 757 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
292 µs ± 6.17 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## np.newaxis

The np.newaxis is just an alias for the Python constant None, which means that wherever you use np.newaxis you could also use None:
ref: https://stackoverflow.com/questions/29241056/how-does-numpy-newaxis-work-and-when-to-use-it

In [93]:
np.newaxis is None

True

The np.newaxis is generally used with slicing. It indicates that you want to add an additional dimension to the array. The position of the np.newaxis represents where I want to add dimensions.

In [95]:
a = np.arange(10)
print(a)
a.shape

[0 1 2 3 4 5 6 7 8 9]


(10,)

In the first example I use all elements from the first dimension and add a second dimension:

In [96]:
a[:, np.newaxis]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [97]:
a[:, np.newaxis].shape

(10, 1)

The second example adds a dimension as first dimension and then uses all elements from the first dimension of the original array as elements in the second dimension of the result array:

## Computation on Arrays: Broadcasting

We saw in the previous section how NumPy's universal functions can be used to vectorize operations and thereby remove slow Python loops. Another means of vectorizing operations is to use NumPy's broadcasting functionality. Broadcasting is simply a set of rules for applying binary ufuncs (e.g., addition, subtraction, multiplication, etc.) on arrays of different sizes.



### Introducing Boradcasting
Recall that for arrays of the same size, binary operations are performed on an element-by-element basis:

In [80]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b

array([5, 6, 7])

In [81]:
a + 5

array([5, 6, 7])

In [82]:
M = np.ones((3, 3))
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [83]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

While these examples are relatively easy to understand, more complicated cases can involve broadcasting of both arrays. Consider the following example:

In [100]:
a = np.arange(3)
b = np.arange(3)[:, np.newaxis]

print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


In [101]:
print("a, shape:", a.shape)
print("b, shape:", b.shape)

a, shape: (3,)
b, shape: (3, 1)


In [102]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

For better Understanding:
https://nbviewer.jupyter.org/github/jakevdp/PythonDataScienceHandbook/blob/master/notebooks/figures/02.05-broadcasting.png>

### Rules of Broadcasting:
Rules of Broadcasting
Broadcasting in NumPy follows a strict set of rules to determine the interaction between the two arrays:

- Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.
- Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
- Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.