Numpy is the fundamental package for numeric computing with Python. It provides powerful ways to create store and manipulate data, which makes it able to seamlessly and speedily integrate with a wide variety of databases and data formats. This is also the foundation that Pandas is built on which is a high performance data-centric package that we're going to learn more about in this course. 

In this lecture, we're going to talk about creating arrays with certain data types, manipulating arrays ,selecting elements from arrays and loading data sets into arrays. Such functions are useful for manipulating data and understanding the functionalities of other common python data packages

In [2]:
import numpy as np
import math

### 1. Array Creation

In [3]:
a = np.array([1,2,3])
print(a)

# We can print the number of dimensions of a list using the ndim attribute
print(a.ndim)

[1 2 3]
1


In [4]:
# If we pass in a list of lists in numpy array, we create a multi-dimensional array, for instance, a matix
b = np.array([[1,2,3],[4,5,6]])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [5]:
# We can print out the length of each dimnesion by calling the shape attribute (return a tuple)
b.shape

(2, 3)

In [6]:
# We can also check the type of items in the array
a.dtype

dtype('int32')

In [7]:
# Besides integers, floats are also accepted in numpy arrays
c = np.array([2.2,5,1.1])
c.dtype.name

'float64'

In [8]:
c

array([2.2, 5. , 1.1])

In [9]:
# Sometimes we know the shape of array that we want to create, but not sure what to be in it.
# Numpy offers several functions to create arrays with initial placeholders, such as zero;s or one's

d = np.zeros((2,3))
print(d)

e = np.ones((2,3))
print(e)

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]


In [10]:
# We can also generate an array with random numbers

np.random.rand(2,3)

array([[0.06461625, 0.17311035, 0.31779442],
       [0.63964827, 0.27570981, 0.64582404]])

In [11]:
# We can also create a sequence of numbers in array with the arange() function.
# The first argument is the starting bound
# The second argument is the ending bound (not inclusive)
# The third argument is the difference between each consecutive numbers (spacing)

f = np.arange(10,50,2)
f

array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,
       44, 46, 48])

In [12]:
# If we to generate a sequence of floats, we can use the linspace() function. 
# The third argument here is the total number of elements we want to generate

np.linspace(0,3,12)

array([0.        , 0.27272727, 0.54545455, 0.81818182, 1.09090909,
       1.36363636, 1.63636364, 1.90909091, 2.18181818, 2.45454545,
       2.72727273, 3.        ])

### 2. Array Operations

In [13]:
# Arithmetic operators on array applay elementwise

a = np.array([10,20,30,40])
b = np.array([1,2,3,4])

c = a -b 
print(c)

d = a*b
print(d)

[ 9 18 27 36]
[ 10  40  90 160]


In [14]:
# Let's create an array of typical Ann arbor winter farenheit values
farenheit = np.array([0,-10,-5,-15,0])

celcius = (farenheit - 31) * (5/9)
celcius

array([-17.22222222, -22.77777778, -20.        , -25.55555556,
       -17.22222222])

In [15]:
# Boolean array 
# We can apply an operator on an array, and a boolean array will be returned for any element in the original, with True being the output if it meets the condition.

celcius > -20

array([ True, False, False, False,  True])

In [16]:
# using the modulus operator to check numbers in ana array to see if they are even

celcius%2 == 0

array([False, False,  True, False, False])

### 3. Matrix manipulation

#### 3.1. Matrix product

In [17]:
# Matrix product
# For element wise, we use the "*" sign 

a = np.array([[1,1],[0,1]])
b = np.array([[2,0],[3,4]])

print(a*b)

[[2 0]
 [0 4]]


In [20]:
a*b

array([[2, 0],
       [0, 4]])

In [21]:
np.dot(a,b)

array([[5, 4],
       [3, 4]])

In [22]:
# if we want to do matrix product, we use "@" sign or use the dot function
print(a@b)

np.dot(a,b)

[[5 4]
 [3 4]]


array([[5, 4],
       [3, 4]])

In [19]:
# a few more linear algebra concepts are worth layering in here. 
# The product of two matrices is only plausible when the inner dimensions of the two matrices are the same. 
# The dimensions refer to the numberof elements, both horizontal and vertical in the rendered matricesthat you've been seeing here. 
# So, we can use numpy to quicklysee the shape of the matrix:

a.shape

(2, 2)

In [16]:
from itertools import count
import numpy as np
import math

data = np.array([1,2,3,4,5,10,15,65,843,153,121,546,21,64.1,1,54,143,15,4843,1651])



len(data)


20

In [22]:
mean = np.mean(data)
std = np.std(data)

upper = mean+std
lower = mean - std


data[data > lower]

array([1.000e+00, 2.000e+00, 3.000e+00, 4.000e+00, 5.000e+00, 1.000e+01,
       1.500e+01, 6.500e+01, 8.430e+02, 1.530e+02, 1.210e+02, 5.460e+02,
       2.100e+01, 6.410e+01, 1.000e+00, 5.400e+01, 1.430e+02, 1.500e+01,
       4.843e+03, 1.651e+03])

In [23]:
A = 2*np.arange(10)

In [24]:
a

NameError: name 'a' is not defined

In [32]:
idx = (A > 2) * (A<20)

a = np.where(idx)

In [36]:
len(A[idx])

8