<p>NumPy was developed to perform fast array processing. If we want to create an array of 1million random draws for a uniform distribution and compute the mean and it was done in pure python, it would be hugely slower than a C or Fortran program. Loops o ver python data types like lists carry significant overhead and C anf Fortran contain a lot of type information which can be used for optimization. NumPy generates optimized snippets of machine code compiled from C to speed up specific operations past what python alone could handle. Similar in spirit to Matlab. NumPy is great for operations that are naturally vectorized</p>

<h3>NumPy Arrays</h3>

In [7]:
# NumPy formally defines an array data type called numpy.ndarray
# Similar to native python lists except you can only use one data type and it must be one of the dtypes provided by NumPy
import numpy as np

# np.zeros(n) generates a list of n zeros
a = np.zeros(3)
print(a)
print(type(a))

# The default data type on modern machines is float64

print(type(a[0]))

# We can specify the data type when we initialize the array

b = np.zeros(3, dtype = int)

print(type(b[0]))

[0. 0. 0.]
<class 'numpy.ndarray'>
<class 'numpy.float64'>
<class 'numpy.int64'>


<h3>ndarrays can also have a shape</h3>

In [16]:
import numpy as np


# Here, z is a flat array with no dimension, neither a row nor a column vector
# The shape touple has only one element, the length of the array. To give the array a dimension we can change the
# shape attribute

z = np.zeros(10)
print(z.shape)
print('\n')

y = np.zeros(10)
y.shape = (10,1)    #rows,columns
print(y)
print('\n')

x = np.zeros(4)
x.shape = (2,2)
print(x)
print('\n')

w = np.zeros((3,3)) #Shape can also be specified with a touple when initializing the array
print(w)

(10,)


[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


[[0. 0.]
 [0. 0.]]


[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [27]:
# np.ones() creates an array populated by what you would expect
z = np.ones((2,2))
print(z)
print('\n')

# np.empty creates an array which can later be populated, the numbers that are output initially are garbage values
y = np.empty((3,3))
print(y)
print('\n')

# To set up a grid of evenly spaced numbers use np.linspace

x = np.linspace(2,4,5)   #from 2 to 4 with 5 elements
print(x)
print('\n')

# To create an identity matrix use np.identity or np.eye
w = np.identity(2)
print(w)
print('\n')

# NumPy arrays can also be created from python lists, touples, etc.. using np.array
v = np.array([10,20])    # ndarray from a python list
print(v)
print(type(v))

u = np.array((10,20), dtype = float)    # here, 'float' is equivalent to float64
print(u)
print(type(u))
print('\n')

t = np.array([[10,20], [30,40]])    #here, a 2D array is created from a list of lists
print(t)

[[1. 1.]
 [1. 1.]]


[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


[2.  2.5 3.  3.5 4. ]


[[1. 0.]
 [0. 1.]]


[10 20]
<class 'numpy.ndarray'>
[10. 20.]
<class 'numpy.ndarray'>


[[10 20]
 [30 40]]


In [3]:
# check out np.asarray which is similar but does not make a sictinct copy of data already in NumPy
import numpy as np
na = np.linspace(10,20,2)
print(na is np.asarray(na))      # does not make a copy of the NumPy arrays
print(na is np.array(na))        #does copy the data   -----   perhaps unnecesarily

True
False


<h3>to read array data from a text file containing numeric arrays use np.loadtxt or np.genfromtxt</h3>

In [48]:
#for a flat array, indexing is the same as python sequences

# for 2D arrays it's like a matrix / cartesian grid

z = np.array([[1,2],[3,4]])
print(z)
print(z[0,0])
print(z[1,1])
print(z[0,1])
print(z[1,0])


# rows and columns can be extracted as follows
print(z[0:])
print(z[0,:])
print(z[1,:])
print(z[1:])
print(z[:,1])
print(z[:1])

[[1 2]
 [3 4]]
1
4
2
3
[[1 2]
 [3 4]]
[1 2]
[3 4]
[[3 4]]
[2 4]
[[1 2]]


In [25]:
# arrays can also be used to extract indices  in this case 0, 1, 2
import numpy as np
z = np.linspace(2,4,5)
print(z)

indices = np.array((0,2,3))
print(z[indices])
d = np.array([0,1,1,0,0], dtype = bool)
print(d)
print(z[d])
z = np.empty(3)
print("the following are garbage values in z")
print(z)
z[:]=42
print("these are reinitialized")
print(z)

[2.  2.5 3.  3.5 4. ]
[2.  3.  3.5]
[False  True  True False False]
[2.5 3. ]
the following are garbage values in z
[2.  3.  3.5]
these are reinitialized
[42. 42. 42.]


In [37]:
import numpy as np
a = np.array((4,1,2,3))
a.sort()
print(a)
print(a.sum())      #sum of the elements
print(a.mean())     #mean
print(a.max())      #max
print(a.argmax())   #index of the maximal element
print(a.cumsum())   #Cumulative sum of the elements in a
print(a.cumprod())  #Cumulative product of the elements
print(a.var())      #Variance
print(a.std())      #Standard Deviation

a.shape = (2,2)
print(a.T)                 #equivalent to a.transpose()
z = np.linspace(2,4,5)
print(z.searchsorted(3.5))   #returns the index of the first number >= 2


# many of the above methods have equivalent functions in the numpy namespace
print(np.sum(a))
print(np.mean(a))

[1 2 3 4]
10
2.5
4
3
[ 1  3  6 10]
[ 1  2  6 24]
1.25
1.118033988749895
[[1 3]
 [2 4]]
3
10
2.5


In [50]:
import numpy as np

a = np.array([1,2,3,4])
b = np.array((5,6,7,8))
print(a+b)                 #can add
print(a*b)                 #can multiply
print(a + 10)              #can add a scalar to each element in an array
print(a[2:] + 10)
a[2:] += 10                #can add a scalar to some elements in an array
print(a)
a[2:] -= 10
print(a)
                           #scalar multiplication is similar
print(a * 10)

# 2D arrays work in a very similar way
A = np.ones((2,2))
B = np.ones((2,2))
print(A)
print(A+B)
print(A+10)
print(A*B)
A[:,1] *= 10
print(A)
A[:,1] /= 10

[ 6  8 10 12]
[ 5 12 21 32]
[11 12 13 14]
[13 14]
[ 1  2 13 14]
[1 2 3 4]
[10 20 30 40]
[[1. 1.]
 [1. 1.]]
[[2. 2.]
 [2. 2.]]
[[11. 11.]
 [11. 11.]]
[[1. 1.]
 [1. 1.]]
[[ 1. 10.]
 [ 1. 10.]]
