# The Basics of NumPy (with a view towards Pandas)

In [1]:
# Still following the Udemy course I'm taking, these are my personal notes for future reference on key features of NumPy.

In [2]:
import numpy as np       # This imports numpy. We call on its tools using np.

## Contents

### Section 1: Vectors and Matrices
#### 1.1: Construction via Lists
#### 1.2: Calling from an Array
#### 1.3: Arange and Linspace
#### 1.4: Constant Arrays
#### 1.5: Identity Matrix

### Section 2: Random Matrices

### Section 3: Methods and Operations on Matrices
#### 3.1: Reshape, Max/Min, Argmax/Argmin, Shape, dType, Standard Deviation, and Mean
#### 3.2: Matrix Operations

### Section 4: Working with Slices


### Section 1: Vectors and Matrices

#### 1.1: Construction via Lists

In [3]:
# We can create vectors by casting lists as arrays

In [4]:
lst = [1,2,3]
arr = np.array(lst)
print("Here's our array:",arr)

Here's our array: [1 2 3]


In [5]:
# We can call and grab slices just as with lists:
print(arr[0:2])

[1 2]


In [6]:
# For matrices, we just feed in a list of lists:
lstlsts = [[1,2,3],[4,5,6],[7,8,9]]
mat = np.array(lstlsts)
print(mat)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


#### 1.2: Calling from an Array

In [7]:
# If we want the second row, we just grab the index 1 object in the matrix:
print(mat[1])

[4 5 6]


In [8]:
# We can slice to get a subset of the rows:
print(mat[:2])
print("\n")
print(mat[1:])

[[1 2 3]
 [4 5 6]]


[[4 5 6]
 [7 8 9]]


In [9]:
# Note: if you want a disconnected collection, just feed in the list of indices:
print(mat[[0,2]])
# This returns the first and third rows

[[1 2 3]
 [7 8 9]]


In [10]:
# We can pick entries by via mat[i,j] which takes the (i,j)-th entry of mat:
print(mat)

mat[1,2]

[[1 2 3]
 [4 5 6]
 [7 8 9]]


6

In [11]:
# We can take entire columns by using a colon to tell Python to range over all rows with the column fixed. 
print(mat[:,2])

[3 6 9]


In [12]:
# Python views singular columns as vectors, so that's why this is displayed as a row vector. This isn't the case if we
# take more than one column:
print(mat[:,0:2])

[[1 2]
 [4 5]
 [7 8]]


In [13]:
# Or, again, for a disconnected set of columns:
print(mat[:,[0,2]])

[[1 3]
 [4 6]
 [7 9]]


#### 1.3: Arange and Linspace

In [14]:
# NumPy has a way of producing an array from a range, in one step. np.arange(start,end,step) will produce the vector 
# associated to range(start,end,step).

In [15]:
np.arange(1,10,3)

array([1, 4, 7])

In [16]:
# Of course, we can print this to see this as a vector:
print(np.arange(1,10,3))

[1 4 7]


In [17]:
# np.arange with one argument just gives the vector from 0 to that number (exclusive) in steps of 1:
print(np.arange(4))

[0 1 2 3]


In [18]:
# As in Matlab, we can define linear spaces (arrays of equally distributed points) by specifying the start and end points,
# along with the number of points (dimension of the vector)

np.linspace(2,6,6)

array([2. , 2.8, 3.6, 4.4, 5.2, 6. ])

#### 1.4: Constant Arrays

In [19]:
# We can use np.zeros() with one argument to produce a zero (row) vector, e.g.
print(np.zeros(5))

[0. 0. 0. 0. 0.]


In [20]:
# For a zero matrix, we feed in a pair (note: the double parentheses)
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [21]:
print(np.zeros((3,4)))

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [22]:
# Similarly, np.ones() gives constant arrays of ones:
print(np.ones(4), "\n")
print(np.ones((4,2)))

[1. 1. 1. 1.] 

[[1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]]


In [23]:
# If we want other constant arrays, we can "broadcast" multiplication to the array of ones, or "broadcast" addition to the
# array of zeroes:
print(np.ones((4,2)) * 4, "\n")
print(np.zeros((4,2)) + 4)

[[4. 4.]
 [4. 4.]
 [4. 4.]
 [4. 4.]] 

[[4. 4.]
 [4. 4.]
 [4. 4.]
 [4. 4.]]


#### 1.5: Identity Matrix

In [24]:
# np.eye(n) produces the nxn identity matrix:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

### Section 2: Random Matrices

In [25]:
 # The format is np.random.[distribution]()

In [26]:
# Some options for distributions are:
# -- rand (uniform random distribution on (0,1))
# -- randn (standard normal distribution)
# -- randint (random integers, but needs to extra arguments to determine the finite range of integers)

In [27]:
# Uniform Random Distribution
print(np.random.rand(4), "\n")
print(np.random.rand(4,5), "\n")

[0.98004265 0.37209359 0.45621219 0.85265525] 

[[0.66471977 0.3573711  0.87530761 0.46358525 0.98535909]
 [0.00119771 0.19729914 0.68787558 0.06066785 0.47804527]
 [0.48688407 0.03295954 0.45479626 0.13750471 0.09202598]
 [0.18294424 0.0988233  0.19139279 0.32364974 0.44978898]] 



In [28]:
# Standard Normal Distribution
print(np.random.randn(4), "\n")
print(np.random.randn(4,5), "\n")

[-0.24868983 -0.6763544  -0.35944302 -0.45574247] 

[[ 1.16404727e+00  1.23277123e+00  6.65165996e-01 -1.14144277e-01
   9.17411989e-01]
 [-4.50970432e-01 -4.71509093e-01  1.59733225e+00 -1.61936430e-01
   3.23553505e-01]
 [ 1.45114389e-01  2.15699412e-01 -1.01356404e-01  1.57635809e-03
  -7.81941760e-01]
 [-1.37404648e+00  6.72279443e-01 -1.26891251e+00  3.54118980e-01
  -1.47161308e+00]] 



In [29]:
# Random Integers 
print(np.random.randint(2,10,5), "\n")
print(np.random.randint(2,10,(3,2)), "\n")
# Note: We can't attain 10, in line with everything else we've seen.

# If one just includes two arguments, this will return a single random integer in the given range:
print(np.random.randint(2,10))

[2 6 7 4 6] 

[[8 4]
 [4 3]
 [9 3]] 

4


In [30]:
# NOTE: If using a particular random distribution a lot, import it directly from NumPy to remove the need for the
# "numpy.random." in al of the above. E.g.,

from numpy.random import randint

randint(1,11,(2,3))

array([[4, 7, 4],
       [8, 7, 9]])

### Section 3: Methods and Operations on Matrices

#### 3.1: Reshape, Max/Min, Argmax/Argmin, Shape, dType, Standard Deviation, and Mean

In [31]:
# RESHAPE
# Given an array that is m-by-n, and any pair (s,t) such that st=mn, one can reshape the the array into an s-by-t array.
# (this just reads along the rows)

In [32]:
mat = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(mat,"\n")
print(mat.reshape(4,3),"\n")
print(mat.reshape(2,6),"\n")

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]] 

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]] 

[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]] 



In [33]:
# In the above, we could have constructed the original array by reshaping a vector:
np.arange(1,13).reshape(3,4)

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [34]:
# MAX returns the maximum entry in the array
np.max(mat)

12

In [35]:
# We can specify the axis for max, to return max cols or rows:
print(np.max(mat,axis=0)) ## Gives maxima for each column
print(np.max(mat,axis=1)) ## Gives maxima for each row

[ 9 10 11 12]
[ 4  8 12]


In [36]:
# One can also use max as a method:
mat.max(axis=0)

array([ 9, 10, 11, 12])

In [37]:
#argmax/argmin returns the location of the max/min
mat.argmax()

11

In [38]:
# Shape gives the shape (dimensions) of the array
mat.shape

(3, 4)

In [39]:
# dtype gives the data type for the array:
mat.dtype
# In our example, we have 32-bit integers

dtype('int32')

In [40]:
# One also has statistical methods:
print(mat.std())
print(mat.mean())

3.452052529534663
6.5


#### 3.2: Matrix Operations

In [41]:
# Addition, Subtraction, Multiplication, Division and Exponentiation are all given entry-wise:

In [42]:
mat1 = np.array([[1,2,3],[4,5,6],[1,3,6]])
mat2 = np.array([[2,1,0],[1,1,1],[2,2,2]])
print(mat1,"\n")
print(mat2,"\n")
print(mat1+mat2,"\n")
print(mat1*mat2,"\n")
print(mat1/mat2,"\n")
print(mat1**mat2)

[[1 2 3]
 [4 5 6]
 [1 3 6]] 

[[2 1 0]
 [1 1 1]
 [2 2 2]] 

[[3 3 3]
 [5 6 7]
 [3 5 8]] 

[[ 2  2  0]
 [ 4  5  6]
 [ 2  6 12]] 

[[0.5 2.  inf]
 [4.  5.  6. ]
 [0.5 1.5 3. ]] 

[[ 1  2  1]
 [ 4  5  6]
 [ 1  9 36]]


  print(mat1/mat2,"\n")


In [43]:
# One can also perform these with scalars. The operations then "broadcast" to all entries:
print(mat1,"\n")
print(mat1 * 3,"\n")
print(mat1 + 2,"\n")

[[1 2 3]
 [4 5 6]
 [1 3 6]] 

[[ 3  6  9]
 [12 15 18]
 [ 3  9 18]] 

[[3 4 5]
 [6 7 8]
 [3 5 8]] 



In [44]:
# Booleans can also be cast to an array:
rmat = randint(-10,11,(4,4))
print(rmat)
rmat>2 # returns a matrix of booleans with True entries if and only if the entry satisfies the boolean.

[[ -2  -2   2  -3]
 [  5  -1  -6  -7]
 [  3  -3   6 -10]
 [ -3   1  -7   6]]


array([[False, False, False, False],
       [ True, False, False, False],
       [ True, False,  True, False],
       [False, False, False,  True]])

In [45]:
# We can feed this boolean array back into the matrix to return the vector of all entries in the original array that satisfy
# the boolean:
rmat[rmat>2]

array([5, 3, 6, 6])

### Section 4: Working with Slices

In [46]:
# Caveat!: Let's define a matrix (in fact, for simplicity, let's take a vector):
vect = np.arange(11)
print(vect)

[ 0  1  2  3  4  5  6  7  8  9 10]


In [47]:
# If we define a new vector as a slice of "vect"
vect2 = vect[3:8]
print(vect2)

[3 4 5 6 7]


In [48]:
# and then perform an operation reassigning values in vect2:
vect2[1]=0
vect2[3]=-5
print(vect2)

[ 3  0  5 -5  7]


In [49]:
# then observe what happens when we return to vect:
vect

array([ 0,  1,  2,  3,  0,  5, -5,  7,  8,  9, 10])

In [50]:
# We see that the entries of vect2 were linked to those of vect. To get around this, use the "copy" method:
vect = np.arange(11)
print(vect)
vect2 = vect[3:8].copy()
print(vect2)
vect2[1]=0
vect2[3]=-5
print(vect2)
print(vect)

[ 0  1  2  3  4  5  6  7  8  9 10]
[3 4 5 6 7]
[ 3  0  5 -5  7]
[ 0  1  2  3  4  5  6  7  8  9 10]


In [51]:
# We see that vect was left unchanged!