# Numpy Tutorial

## Load in necessary libraries

In [85]:
import numpy as np
import sys

## The basics (create array, get shape, size, data type, and # of dimensions)

#### Creating arrays

In [86]:
# create array and set data type
a = np.array([1, 2, 3], dtype = 'int64')
print(a)

[1 2 3]


In [87]:
# create another array 
b = np.array([[9.0, 8.0, 7.0], [6.0, 5.0, 4.0]])
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


#### Get dimensions of array

In [88]:
# get array dimensions of array 'a'
a.ndim

1

In [89]:
# get array dimensions of array 'b'
b.ndim

2

#### Get shape of array

In [90]:
# get shape of array 'a' (1 by 3)
a.shape

(3,)

In [91]:
# get shape of array 'b' (2 by 3)
b.shape

(2, 3)

#### Get data type of array

In [92]:
# get data type of array 'a'
a.dtype

dtype('int64')

In [93]:
# get data type of array 'b'
b.dtype

dtype('float64')

#### Get size of array (item size & total size)

In [94]:
# get size of array 'a'
# will be item size of 8 bytes because data type is int64 (2^8 = 64)
a.itemsize

8

In [95]:
# get size of array 'b'
# will be item size of 8 bytes because data type is float64 (2^8 = 64)
b.itemsize

8

In [96]:
# get total size of array 'a' by multiplying total number of elements with item size
# a.size = 3 because 3 elements in array
total_size_a = a.size * a.itemsize
total_size_a

24

In [97]:
# get total size of array 'a' by multiplying total number of elements with item size
# a.size = 6 because 6 elements in array (3 elements in each sublist)
total_size_b = b.size * b.itemsize
total_size_b

48

In [98]:
# easier way to get total size or total number of bytes
a.nbytes

24

In [99]:
# easier way to get total size or total number of bytes
b.nbytes

48

## Accessing/Changing specific elements, rows, columns, etc.

#### 2D example

In [100]:
# create 2x5 array
c = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
print(c)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]


In [101]:
# get a specific element [row, column]
print(c[1, 4])

10


In [102]:
# get specific row [row, :]
# colon in the column section means every column for that row
print(c[0, :])

[1 2 3 4 5]


In [103]:
# get a specific column [:, column]
# colon in the row section means every row for that column
print(c[:, 0])

[1 6]


In [104]:
# get a little more fancy [start-index: end-index: stepsize]
# get elements 1, 3, 5 from the first row (do end-index + 1 to include the last value)
print(c[0, 0:5:2])

[1 3 5]


In [105]:
# change values (10 => 20)
c[1, 4] = 20
print(c)

[[ 1  2  3  4  5]
 [ 6  7  8  9 20]]


In [106]:
# make all elements with index [1] in each row the number 5
c[:, 1] = 5
print(c)

[[ 1  5  3  4  5]
 [ 6  5  8  9 20]]


#### 3D example

In [107]:
# create 3D array
d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(d)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [108]:
# get specific element (work outside in) 
# start with the primary array index, then secondary array index, then the element
print(d[0, 1, 1])

4


In [109]:
# replace values 
# in every primay array row, we want the secondary array row with index [1]
# in those secondary arrays, we want every element
d[:, 1, :] = [[9, 9], [8, 8]]
print(d)

[[[1 2]
  [9 9]]

 [[5 6]
  [8 8]]]


## Initializing different types of arrays

#### Different arrays

In [126]:
# all zeros matrix 
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [127]:
# all ones matrix
np.ones((4, 2, 2))

array([[[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.]]])

In [128]:
# any other number matrix
# takes in 2 parameters (shape, value)
np.full((2, 2), 99)

array([[99, 99],
       [99, 99]])

In [130]:
# full_like matrix
# takes the shape of an already initialized matrix and replaces all values
np.full_like(a, 4)

array([4, 4, 4])

In [131]:
# random decimal matrix
np.random.rand(4, 2)

array([[0.34080667, 0.60073253],
       [0.18754076, 0.59596073],
       [0.64806335, 0.88087971],
       [0.63431123, 0.77829062]])

In [133]:
# random sample matrix
# creates random decimal matrix using shape of another matrix
np.random.random_sample(b.shape)

array([[0.49001979, 0.23114413, 0.52730939],
       [0.12488364, 0.64772273, 0.5091158 ]])

In [138]:
# random integer matrix
# takes 2 parameters (start value & end value of range, shape) 
np.random.randint(-4, 8, size = (3, 3))

array([[-4,  2,  6],
       [ 3, -1, -2],
       [-3, -2,  7]])

In [142]:
# identity matrix
# always gives square matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [146]:
# repeat arrays
# axis = 1 will multiply each element by 3 instead of the array by 3
# axis = 0 will repeat the array instead of the individual elements
arr = np.array([[1, 2, 3]])
r1 = np.repeat(arr, 3, axis = 0)
print(r1)

[[1 2 3]
 [1 2 3]
 [1 2 3]]


#### Problem 1: Create a 5x5 array. The outer layer should be ones. The center should have a value of 9. The middle layer should be zeros.

In [154]:
# initialize outermost layer of final array
output = np.ones((5, 5))

# initialize middle layer
z = np.zeros((3, 3))

# change value of center of array 
z[1, 1] = 9

# replace middle of ones array with 'z' array 
# row of index [1:4] starts at second row (index [1]) and includes fourth row (index [3])
# column of index [1:4] starts at second column (index [1]) and includes fourth column (index [3])
output[1:4, 1:4] = z
print(output)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


#### Be careful when copying arrays

In [158]:
# initialize array
arr1 = np.array([1, 2, 3])

# make a direct copy of arr1
arr2 = arr1

# change first value of arr2
arr2[0] = 100

# changing the value of arr2 will also change arr1
# this is because we directly assigned arr2 to arr1
print(arr2)
print(arr1)

[100   2   3]
[100   2   3]


In [159]:
# initialize array
arr1 = np.array([1, 2, 3])

# use copy method on arr1
arr2 = arr1.copy()

# change first value of arr2
arr2[0] = 100

# changing the value of arr2 will no longer change arr1
# this is because we created a new array with the same contents as arr1 and assigned it to arr2
print(arr2)
print(arr1)

[100   2   3]
[1 2 3]


## Mathematics

#### Arithmetic

In [173]:
# initialize array
new_arr = np.array([1, 2, 3, 4])
print(new_arr)

[1 2 3 4]


In [164]:
# add to each element in array
new_arr + 2

array([3, 4, 5, 6])

In [167]:
# subtract from each element in array
new_arr - 2

array([-1,  0,  1,  2])

In [171]:
# multiply each element in array
new_arr * 2

array([2, 4, 6, 8])

In [172]:
# divide each element in array
new_arr / 2

array([0.5, 1. , 1.5, 2. ])

In [176]:
# use exponents on each element in array
new_arr ** 2

array([ 1,  4,  9, 16])

In [175]:
# add arrays together
# each element of the first row is paired with its corresponding element in the other row
new_arr2 = np.array([1, 0, 1, 0])
new_arr2 + new_arr

array([2, 2, 4, 4])

#### Trigonometry

In [177]:
# take sine value of each element in array
np.sin(new_arr)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [178]:
# take cosine value of each element in array
np.cos(new_arr)

array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])

In [179]:
# take tangent value of each element in array
np.tan(new_arr)

array([ 1.55740772, -2.18503986, -0.14254654,  1.15782128])

#### Linear Algebra

In [186]:
# multiplying arrays of different shapes will not work
# here's an example

array1 = np.ones((2, 4))
array2 = np.full((4, 2), 2)

array1 * array2

ValueError: operands could not be broadcast together with shapes (2,4) (4,2) 

In [187]:
# use matrix multiply method to do this
np.matmul(array1, array2)

array([[8., 8.],
       [8., 8.]])

In [189]:
# find determinant of matrix

# all identity matrices have determinant of 1
array3 = np.identity(3)

# use linear algebra determinant method
np.linalg.det(array3)

np.float64(1.0)

#### Statistics

In [190]:
# initialize array
stats = np.array([[1, 2, 3], [4, 5, 6]])
stats

array([[1, 2, 3],
       [4, 5, 6]])

In [191]:
# take minimum value of array
np.min(stats)

np.int64(1)

In [192]:
# take maximum value of array
np.max(stats)

np.int64(6)

In [193]:
# take minimum value of each row
np.min(stats, axis = 1)

array([1, 4])

In [194]:
# take minimum value of each column
np.min(stats, axis = 0)

array([1, 2, 3])

In [195]:
# get sum of all values in each row
np.sum(stats, axis = 1)

array([ 6, 15])

In [196]:
# get sum of all values in each column
np.sum(stats, axis = 0)

array([5, 7, 9])

## Reorganizing arrays

#### Reshape arrays

In [199]:
# initialize array
before = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(before)

# reshape array (8 rows, 1 column)
after = before.reshape((8, 1)) 
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]


#### Vertically stacking vectors/matrices

In [202]:
# initialize arrays
v1 = np.array([1, 2, 3, 4])
v2 = np.array([5, 6, 7, 8])

# vertically stack v1 onto v2
np.vstack([v1, v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

#### Horizontally stacking vectors/matrices

In [204]:
# initialize arrays
h1 = np.ones((2, 4))
h2 = np.zeros((2, 2))

# horizontally stack h1 and h2
np.hstack([h1, h2])

array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

## Miscellaneous

#### Load data from file

In [225]:
# get data using numpy
filedata = np.genfromtxt('data.txt', delimiter = ',')

# change all values from floats to integers
filedata = filedata.astype('int32')
print(filedata)

[[  1  13  21  11 196  75   4   3  34   6   7   8   0   1   2   3   4   5]
 [  3  42  12  33 766  75   4  55   6   4   3   4   5   6   7   0  11  12]
 [  1  22  33  11 999  11   2   1  78   0   1   2   9   8   7   1  76  88]]


#### Advanced indexing & Boolean masking

In [221]:
# give me boolean values for each element greater than 50
filedata > 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [222]:
# get array of all values greater than 50
filedata[filedata > 50]

array([196,  75, 766,  75,  55, 999,  78,  76,  88], dtype=int32)

In [223]:
# index with a list

# initialize array
lmao = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])

# get array of indexes 1, 2, and 8
lmao[[1, 2, 8]]

array([2, 3, 9])

In [224]:
# get boolean array of any values in each column that are greater than 50
np.any(filedata > 50, axis = 0)

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [226]:
# get boolean array where all values in each column are greater than 50 
np.all(filedata > 50, axis = 0)

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [227]:
# get boolean values for numbers greater than 50 and less than 100
((filedata > 50) & (filedata < 100))

array([[False, False, False, False, False,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])