In [2]:
# Load NumPy
import numpy as np

# To bypass 'module not found' error make sure using Python 3.8.5 64-bit /Library/Frameworks/Python.framework/... environ (same location as numpy installation)

<style>
    .body {
        font-weight: 200;
        font-size: 14px;
        padding-left: 30px;
    }
    .note {
        font-size: 12px;
        color: red;
    }
</style>

# The 1D NumPy Array
In python we can't do calculations entire list efficiently
+ instead we use the numpy array  
+ We can perform <strong>element-wise calculations</strong> on arrays (<em>i.e. adding lists, multiplying list by constant, adding constant to list</em>)
<br/>

To create an array use <strong>np.array(x)</strong>
+ 'x' is a python list  
<br/>

Caveat -- <em>type coercion</em>
+ NumPy arrays can only contain one type
><em>When multiple types passed into the same array, numpy automatically adjusts the values so they are all of the same type</em>

In [5]:
# Creating arrays
list = [1, 2, 3]
print( np.array(list) )

# OR

print( np.array([1, 2, 3]) )

[1 2 3]
[1 2 3]


In [9]:
# Examples of type coercion
np_nums1 = np.array([1, True, 3])
print(np_nums1)                      # <= all converted to ints

np_nums2 = np.array([1.8, '1', 3])
print(np_nums2)                     # <= all converted to strings

np_nums3 = np.array([1.31, 2.8, 3])
print(np_nums3)                     # <= all converted to floats

[1 1 3]
['1.8' '1' '3']
[1.31 2.8  3.  ]


## Subsetting 1D Arrays
<p style="font-size: 14px; weight: 400;">We can subset 1d arrays like we subset lists: <strong>x[y]</strong></p>
- 'x' is the array name, 'y' is the index

<br/>
<p style="color:red;">NOTE: To return an array of boolean values (True or False), <span style="border-bottom: 1px solid red;">call np.array() on relational operator</span> involving pre-made array(<em>i.e. >, <, !=, etc.</em>)</p>
- when selecting items that meet criteria of relational operator 'y' is the relational operator


In [23]:
weights = np.array([160, 130, 110, 230, 110, 210])

# Return array of boolean values
light = weights < 150             # <= calls np.array() on relational operator involving pre-made ARRAY
print(light)

# SUBSET array based on relational operator
np_weights_light = weights[light]
            #or
np_weights_light = weights[weights < 150]


print(np_weights_light)

[False  True  True False  True False]
[130 110 110]


---
# 2D NumPy Arrays

<p>Created from list of lists, <span style="color:red;"><em>aka each sublist corresponds to a row of 2D NumPy array</em></span></p>
- to create 2D array use <strong>np.array(x)</strong> where 'x' is a list of lists
<br/><br/>

<p>Printing shape attribute clarifies number of rows and cols in 2D array</p>
- <strong>array.shape</strong> where 'array' is the name of the array in question
<br/><br/>

<p style="color: red; font-style: italic;">NOTE: number of rows/cols DOES NOT determine dimensions</p>

In [30]:
rectangles = [[20, 40], [25, 45], [30, 50], [35, 55]]
np_rectangles = np.array(rectangles)
            #or
np_rectangles = np.array([[20, 40],
                        [25, 45],
                        [30, 50],
                        [35, 55]])

print(np_rectangles)

# check dimensions with array.shape
print(np_rectangles.shape)              # <= this 2D array is 4 x 2 (rows x cols)

[[20 40]
 [25 45]
 [30 50]
 [35 55]]
(4, 2)


## Subsetting 2D Arrays
<p style="color:red; font-style:italic;">NOTE: in addition to subsetting like regular list of lists (with [..][..]), we can subset 2D arrays more intuitively</p>
Use <strong>array[rows,cols]</strong><br/>
- 'rows' is subsetting of rows<br/>
- 'cols' is subsetting of columns

In [35]:
print(np_rectangles)

# select the entire second column
width = (np_rectangles[:,1])           # <= we want every row so ':' before comma
print(width) 

# select the length of the 3 rectangle
rectangle3 = np_rectangles[2,0]         # <= we want row 3 col 1
print(rectangle3)

# select middle two rectangles
mid_rectangles = np_rectangles[1:3,:] 
print(mid_rectangles)

[[20 40]
 [25 45]
 [30 50]
 [35 55]]
[40 45 50 55]
30
[[25 45]
 [30 50]]


## 2D Array Arithmetic
Arithmetic is ALWAYS performed element-wise   
- even when multiplying (a) matrix x matrix, (b) matrix x vector  
- aka matrix multiplaction is NOT occurring here
<p style="color: red; font-style: italic;">NOTE: the key is having the same number of columns</p>

In [11]:
np_mat = np.array([[1, 2],
                    [3, 4],
                    [5, 6]])

# Matrix-Vector arithmetic example
print(np_mat * np.array([2, 2]))       # multiplication
print(np_mat + np.array([2, 2]))       # addition

# Matrix-Matrix arithmetic example
print(np_mat * np_mat)                 # multiplication; notice this is NOT conventional matrix mult
print(np_mat + np_mat)                 # addition

# Multiplying or adding constant to matrix same as mult/adding constant to list

[[ 2  4]
 [ 6  8]
 [10 12]]
[[3 4]
 [5 6]
 [7 8]]
[[ 1  4]
 [ 9 16]
 [25 36]]
[[ 2  4]
 [ 6  8]
 [10 12]]


---
# NumPy Exploratory Data Analysis
### Summary stats
These can be used on lists or subsets of arrays for example
<p>- <strong>np.mean()</strong> returns avg value</p>
<p>- <strong>np.median()</strong> returns median value</p>
<p>- <strong>np.std()</strong> returns standard deviation</p>
<p>- <strong>np.corrcoef(a, b)</strong> returns correlation coefficient matrix of two variables 'a' and 'b'<p>

In [25]:
print(np_mat)

# Find avg
print("Overall Mean: " + str(np.mean(np_mat)))             # overall mean ex
print("Mean of col2: " + str(np.mean(np_mat[:,1])))       # mean of subsets ex

# Find median
print("Overall Median: " + str(np.median(np_mat)))          # overall ex
print("Median of row2: " + str(np.median(np_mat[1,:])))     # subsetting ex

# Find sd
print("Overall SD: " + str(np.std(np_mat)))                                 # overall ex
print("SD of subset rows(1,2) cols(1,2): " + str(np.std(np_mat[:2,:])))     # subsetting ex

# Find correlation coefficient
print("Corr Coef of columns: " + str(np.corrcoef(np_mat[:,0], np_mat[:,1])))        # btw cols ex
print("Corr Coef of rows 1 & 3: " + str(np.corrcoef(np_mat[0,:], np_mat[2,:])))     # btw rows ex
# We'd usually need more data for this to mean something...

[[1 2]
 [3 4]
 [5 6]]
Overall Mean: 3.5
Mean of col2: 4.0
Overall Median: 3.5
Median of row2: 3.5
Overall SD: 1.707825127659933
SD of subset rows(1,2) cols(1,3): 1.118033988749895
Corr Coef of columns: [[1. 1.]
 [1. 1.]]
Corr Coef of rows 1 & 3: [[1. 1.]
 [1. 1.]]
[[1 2]
 [3 4]]


### Other Basic Functions
<p>- <strong>np.sum()</strong></p>
<p>- <strong>np.sort()</strong></p>
<p>- <strong>np.random.normal(loc=0, scale=1, size)</strong> where 'scale' is sd, 'loc' is mean</p>
<p>- <strong>np.round(num, x)</strong> where 'num' is value to be rounded, 'x' is # decimal places to round to</p>

In [37]:
np_mat2 = np.array([[3, 4],
                    [1, 5],
                    [6, 2]])
print(np_mat2)

# Find sum
print("Overall Sum: " + str(np.sum(np_mat2)))
print("Sum of subset rows(1,2) cols(1,2): " + str(np.sum(np_mat2[:2,:])))

# Sort in numerical order
print(np.sort(np_mat2, axis=None))      # sort flattened array in numerical order
print(np.sort(np_mat2))                 # sort each row's contents in numerical order

# Generate random normal data
print(np.random.normal(size = 100))
print(np.round(np.random.normal(size = 100), 1))    # <= round all vlaues to one decimal point


[[3 4]
 [1 5]
 [6 2]]
Overall Sum: 21
Sum of subset rows(1,2) cols(1,2): 13
[1 2 3 4 5 6]
[[3 4]
 [1 5]
 [2 6]]
[-1.645793   -1.39289176 -0.07561425  0.53490626  0.0332905  -0.79408644
 -0.0408594  -0.51897577  1.15211938 -0.73501304 -1.62296338  1.27995573
 -1.85402345 -0.92181102 -0.03915506  1.4623496  -1.39237995  2.41647966
 -1.25236105 -0.98582998 -2.00021386 -0.6931415   0.71937782  2.42026336
 -0.02565984 -0.23828758 -1.05596606 -1.22063382 -1.30108363  2.59875436
  0.06611106 -2.4215275  -0.0797285   0.67962701  0.82871815  0.33706319
 -0.63116362  0.74596329 -0.26556064 -0.56049643  1.26654534  0.67734123
 -0.18861956  1.39581002 -1.61388105  1.08248156 -0.7747148  -0.53412125
 -1.45723274  0.55213858  1.83621474  0.31314673 -1.84288869 -0.28699926
  0.05748707 -1.00219434 -2.00761632  2.00962656 -0.55510016  0.97524105
  0.79145136 -1.27270182 -0.06605628 -0.66467801  1.49965743 -0.01411219
  0.48495771  0.74249308  1.00609888  0.40788422  1.67256027  0.16467736
 -0.52156006