# Intro to Numpy

#### Instructor: Dan Wang

## 1. The NumPy ndarray: A Multidimensional Array Object

### 1.1 create a ndarray

In [2]:
data1 = [6, 7.5, 8, 0, 1]   # list
data1

[6, 7.5, 8, 0, 1]

In [4]:
import numpy as np

In [1]:
dir(np)

In [5]:
arr1 = np.array(data1)   # one-dimentional array
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [4]:
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
data2

[[1, 2, 3, 4], [5, 6, 7, 8]]

In [7]:
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [8]:
print (arr2.ndim)
print (arr2.shape)  # shape means: how many rows * how many coloumns

2
(2, 4)


In [9]:
print (arr1.dtype)
print (arr2.dtype)  

float64
int64


In [5]:
np.zeros(10)   # generate an array of ten 0s

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [6]:
np.ones((3, 6))  # 3 * 6 array, value: 1

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.]])

In [7]:
np.zeros((3, 6, 2))  # 3-d array 3*6*2

array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]])

In [10]:
np.empty((2, 3, 2))

array([[[-1.28822975e-231, -1.49457921e-154],
        [ 2.37663529e-312,  2.56761491e-312],
        [ 8.48798317e-313,  9.33678148e-313]],

       [[ 1.08221785e-312,  6.79038653e-313],
        [ 8.70018275e-313,  1.33952119e-075],
        [ 0.00000000e+000,  8.34404897e-309]]])

In [6]:
np.arange(1,10,0.5)    # array-rang: 1~10 step by 0.5

array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5, 7. ,
       7.5, 8. , 8.5, 9. , 9.5])

In [14]:
np.eye(3, 3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### 1.2 Data type for ndarray

In [15]:
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr1

array([1., 2., 3.])

In [17]:
arr2 = np.array([1, 2, 3.3], dtype=np.int32)
arr2

array([1, 2, 3])

In [21]:
float_arr = arr2.astype(np.float)  # change data type of an array
float_arr.dtype

dtype('float64')

In [22]:
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float) #lazy to write float instead of float64

array([ 1.25, -9.6 , 42.  ])

In [8]:
int_array = np.arange(10)
calibers = np.array([.22, .270,], dtype=np.float64)
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

### 1.3 Operations between Arrays and Scalars

In [23]:
[1,2,3]+[1,2,3]     # '+' for list, doubles the size

[1, 2, 3, 1, 2, 3]

In [7]:
# vectorization
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [25]:
arr + arr           # '+' for array, adding operation

array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])

In [26]:
arr * arr       # elements mul with the relative position; two arrays must have same shape

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [27]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [28]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [29]:
arr.T         # transpose

array([[1., 4.],
       [2., 5.],
       [3., 6.]])

In [30]:
arr * arr.T

ValueError: operands could not be broadcast together with shapes (2,3) (3,2) 

In [31]:
np.matmul(arr,arr.T)   # matrix operator * 

array([[14., 32.],
       [32., 77.]])

### 1.4 Basic Indexing and Slicing

In [9]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [33]:
arr[5]

5

In [34]:
arr[5:8]

array([5, 6, 7])

In [12]:
arr[5:8] = 12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [13]:
arr_slice = arr[5:8]  # arr_slice is a pointer pointing a the unit [5:8];   if want to copy, use copy function
print(arr_slice)
arr_slice[1] = 12345 #this is a pointer
arr_slice

[12 12 12]


array([   12, 12345,    12])

In [14]:
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,
           9])

In [14]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [15]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]

array([7, 8, 9])

In [17]:
arr2d[0][2]

3

In [18]:
arr2d[0, 2]

3

In [19]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [20]:
arr2d[:2, 1:]

array([[2, 3],
       [5, 6]])

### 1.5 Boolean Indexing

In [16]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)   # Return a sample (or samples) from the “standard normal” distribution (phi is small)

In [23]:
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [22]:
data

array([[-0.14413283, -0.64721095,  0.89096698, -1.68947228],
       [-0.94822389, -1.36815731,  0.50503491, -0.31978694],
       [ 1.01934022,  0.64482122, -2.42389381, -0.73740778],
       [-0.62052521,  0.96377942, -0.36626361,  2.29088454],
       [ 0.29763066,  0.90843788,  0.7922886 , -0.32980198],
       [-1.51885652, -0.43084181, -1.06695643, -0.25502564],
       [-0.64938239,  0.82010006,  1.42922141,  0.54312199]])

In [24]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [25]:
data[names == 'Bob']   # select the True row

array([[-0.14413283, -0.64721095,  0.89096698, -1.68947228],
       [-0.62052521,  0.96377942, -0.36626361,  2.29088454]])

In [26]:
data[names == 'Bob', 2:]

array([[ 0.89096698, -1.68947228],
       [-0.36626361,  2.29088454]])

In [27]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [28]:
data[~(names == 'Bob')]

array([[-0.94822389, -1.36815731,  0.50503491, -0.31978694],
       [ 1.01934022,  0.64482122, -2.42389381, -0.73740778],
       [ 0.29763066,  0.90843788,  0.7922886 , -0.32980198],
       [-1.51885652, -0.43084181, -1.06695643, -0.25502564],
       [-0.64938239,  0.82010006,  1.42922141,  0.54312199]])

In [None]:
~(names == 'Bob')

In [29]:
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [None]:
data[mask]

In [30]:
data[data < 0] = 0
data

array([[0.        , 0.        , 0.89096698, 0.        ],
       [0.        , 0.        , 0.50503491, 0.        ],
       [1.01934022, 0.64482122, 0.        , 0.        ],
       [0.        , 0.96377942, 0.        , 2.29088454],
       [0.29763066, 0.90843788, 0.7922886 , 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.82010006, 1.42922141, 0.54312199]])

In [None]:
data[names != 'Joe'] = 7
data

### 1.6 Fancy Indexing

In [31]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [32]:
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [None]:
arr[[-3, -5, -7]]

In [33]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [None]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

### 1.7 Transposing Arrays and Swapping Axes

In [None]:
arr = np.arange(15).reshape((3, 5))
arr

In [None]:
arr.T

In [None]:
arr = np.random.randn(6, 3)
arr

In [None]:
np.dot(arr.T, arr)    #

In [None]:
arr = np.arange(16).reshape((2, 2, 4))
arr

In [None]:
arr.swapaxes(1, 2)

In [None]:
arr.swapaxes(0,2).shape

## 2 Data Processing Using Arrays

### 2.1 Expressing Conditional Logic as Array Operations

In [17]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

In [18]:
result = []
for x, y, c in zip(xarr, yarr, cond):
    if c == True:
        result.append(x)
    else:
        result.append(y)

result
            

[1.1, 2.2, 1.3, 1.4, 2.5]

In [19]:
result = np.where(cond, xarr, yarr)
result

array([1.1, 2.2, 1.3, 1.4, 2.5])

In [20]:
arr = np.random.randn(4, 4)
arr

array([[-0.92636926, -0.5535188 ,  1.50926447,  1.42854171],
       [ 0.03707566,  0.33562164, -0.84944689,  0.60644199],
       [ 1.47522922,  1.56632243, -0.95207733, -0.04298442],
       [-0.49531148,  0.19822924, -1.09227341,  0.35463631]])

In [21]:
np.where(arr > 0, 2, -2)    # arr>0 ? arr[i][j]=2 : arr[i][j]=-2

array([[-2, -2,  2,  2],
       [ 2,  2, -2,  2],
       [ 2,  2, -2, -2],
       [-2,  2, -2,  2]])

In [22]:
np.where(arr > 0, 2, arr)

array([[-0.92636926, -0.5535188 ,  2.        ,  2.        ],
       [ 2.        ,  2.        , -0.84944689,  2.        ],
       [ 2.        ,  2.        , -0.95207733, -0.04298442],
       [-0.49531148,  2.        , -1.09227341,  2.        ]])

### 2.2 Sorting

In [23]:
arr = np.random.randn(8)
arr

array([-0.30155911, -0.20620926,  1.4720836 ,  0.80623263,  0.32059491,
       -2.11704994,  0.16626722,  0.50043845])

In [24]:
arr.sort()
arr

array([-2.11704994, -0.30155911, -0.20620926,  0.16626722,  0.32059491,
        0.50043845,  0.80623263,  1.4720836 ])

In [28]:
arr = np.random.randn(5, 3)
arr.sort(1)
arr

array([[-1.15631784, -0.71402001, -0.06085538],
       [-0.65370428, -0.12287743,  0.33528668],
       [-0.33957893,  2.23722616,  2.59164313],
       [-1.10066174,  0.10615427,  0.35688961],
       [-0.43335225,  0.38123934,  0.43060934]])

In [26]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

## 3 Math

In [29]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [30]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [31]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [34]:
x = np.random.randn(8)
x

array([ 1.83684853,  0.33372992, -0.53318979, -0.78028704, -1.79113641,
        0.08886996, -0.30109251,  0.91015972])

In [35]:
y = np.random.randn(8)
y

array([ 1.23348942, -0.40813042, -0.83340321, -1.14287689,  1.40678316,
        1.91037417,  0.190105  , -0.97363394])

In [36]:
np.maximum(x, y) # element-wise maximum

array([ 1.83684853,  0.33372992, -0.53318979, -0.78028704,  1.40678316,
        1.91037417,  0.190105  ,  0.91015972])

In [None]:
arr = np.random.randn(7) * 5
arr

In [37]:
np.modf(arr)

(array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]))

abs, fabs: Compute the absolute value element-wise for integer, floating point, or complex values.
Use fabs as a faster alternative for non-complex-valued data

sqrt: Compute the square root of each element. Equivalent to arr ** 0.5

square: Compute the square of each element. Equivalent to arr ** 2
    
exp: Compute the exponent ex of each element
    
log, log10, log2, log1p: Natural logarithm (base e), log base 10, log base 2, and log(1 + x), respectively
    
sign: Compute the sign of each element: 1 (positive), 0 (zero), or -1 (negative)
        
ceil: Compute the ceiling of each element, i.e. the smallest integer greater than or equal to
each element

floor: Compute the floor of each element, i.e. the largest integer less than or equal to each
element

rint: Round elements to the nearest integer, preserving the dtype
    
modf: Return fractional and integral parts of array as separate array
    
isnan: Return boolean array indicating whether each value is NaN (Not a Number)
    
isfinite, isinf: Return boolean array indicating whether each element is finite (non-inf, non-NaN) or
infinite, respectively

### 3.2 Mathematical and Statistical Methods

In [38]:
arr = np.random.randn(5, 4)
arr

array([[ 0.45395968, -0.60167409,  0.34534922,  1.18873599],
       [ 1.35862767, -2.59742781, -1.25797408,  1.61294773],
       [-0.36678588,  2.13931552,  0.28680341,  0.0548853 ],
       [-1.12402316, -0.40889029, -1.07065124,  0.39648481],
       [-2.87587048,  0.96775933, -1.40405461,  1.19683816]])

In [39]:
arr.mean()

-0.08528224084697264

In [40]:
np.mean(arr)

-0.08528224084697264

In [41]:
arr.sum()

-1.7056448169394527

In [42]:
np.sum(arr)

-1.7056448169394527

In [45]:
arr.mean(axis=0)  # mean every column

array([-0.51081843, -0.10018347, -0.62010546,  0.8899784 ])

In [46]:
arr.sum(0)

array([-2.55409217, -0.50091733, -3.10052731,  4.44989199])

In [47]:
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [48]:
arr.cumsum(1)

array([[ 0,  1,  3],
       [ 3,  7, 12],
       [ 6, 13, 21]])

In [None]:
arr.cumprod(1)

Method Description

sum: Sum of all the elements in the array or along an axis. Zero-length arrays have sum 0.

mean: Arithmetic mean. Zero-length arrays have NaN mean.

std, var: Standard deviation and variance, respectively, with optional degrees of freedom adjustment


min, max: Minimum and maximum.

argmin, argmax: Indices of minimum and maximum elements, respectively.

cumsum: Cumulative sum of elements starting from 0

cumprod: Cumulative product of elements starting from 1

### 3.3 Methods for Boolean Arrays

In [None]:
arr = np.random.randn(100)
(arr > 0).sum()

In [49]:
bools = np.array([False, False, True, False])

In [50]:
bools.any()  # whether there is at least one true 

True

In [51]:
bools.all()

False

### 3.4 Linear Algebra

In [52]:
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
x

array([[1., 2., 3.],
       [4., 5., 6.]])

In [53]:
y

array([[ 6., 23.],
       [-1.,  7.],
       [ 8.,  9.]])

In [54]:
x*y

ValueError: operands could not be broadcast together with shapes (2,3) (3,2) 

In [55]:
x.dot(y)

array([[ 28.,  64.],
       [ 67., 181.]])

In [56]:
from numpy.linalg import inv, qr
X = np.random.randn(5, 5)
mat = X.T.dot(X)
mat

array([[ 4.43252272,  0.76957475, -0.02286849,  0.04657209, -1.56286644],
       [ 0.76957475,  5.01638762,  1.1556318 , -0.87406441,  1.65516476],
       [-0.02286849,  1.1556318 ,  4.65416394, -4.48745565, -0.41168123],
       [ 0.04657209, -0.87406441, -4.48745565,  4.54558161,  0.14052257],
       [-1.56286644,  1.65516476, -0.41168123,  0.14052257,  2.08679451]])

In [57]:
inv(mat)

array([[  7277.91697306,  -8835.1532519 ,  31110.8922251 ,
         28423.89405184,  16681.86206566],
       [ -8835.1532519 ,  10726.06549539, -37768.90959773,
        -34506.85579619, -20251.79237919],
       [ 31110.8922251 , -37768.90959773, 132998.5110171 ,
        121511.97463462,  71312.12595543],
       [ 28423.89405184, -34506.85579619, 121511.97463462,
        111017.72982098,  65153.02938075],
       [ 16681.86206566, -20251.79237919,  71312.12595543,
         65153.02938075,  38238.05723124]])

In [58]:
mat.dot(inv(mat))

array([[ 1.00000000e+00,  9.74208250e-13, -2.01806300e-11,
        -2.41634791e-12, -1.79878746e-11],
       [ 6.19142840e-12,  1.00000000e+00, -3.72792834e-12,
        -1.64538378e-11, -1.05125430e-11],
       [-2.23963537e-12, -5.24383257e-12,  1.00000000e+00,
         4.53011218e-11,  2.59987252e-11],
       [-8.25828659e-12,  1.98040245e-11, -5.58753729e-11,
         1.00000000e+00, -5.45422508e-11],
       [ 2.76723193e-13, -7.78781908e-13, -1.23539431e-12,
        -2.56684137e-11,  1.00000000e+00]])

In [59]:
q, r = qr(mat)
r

array([[-4.76285066e+00, -9.69525837e-01, -2.34304810e-01,
         7.80048613e-02,  1.86843779e+00],
       [ 0.00000000e+00, -5.44568210e+00, -2.60238443e+00,
         2.42385927e+00, -2.16081567e+00],
       [ 0.00000000e+00,  0.00000000e+00, -6.03959320e+00,
         5.96503285e+00,  1.09985927e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -3.50255898e-01,  5.96813799e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  9.33279948e-06]])

diag: Return the diagonal (or off-diagonal) elements of a square matrix as a 1D array, or convert a 1D array into a square

matrix with zeros on the off-diagonal

dot: Matrix multiplication

trace: Compute the sum of the diagonal elements

det: Compute the matrix determinant

eig: Compute the eigenvalues and eigenvectors of a square matrix

inv: Compute the inverse of a square matrix

pinv: Compute the Moore-Penrose pseudo-inverse inverse of a square matrix

qr: Compute the QR decomposition

svd: Compute the singular value decomposition (SVD)

solve: Solve the linear system Ax = b for x, where A is a square matrix

lstsq: Compute the least-squares solution to y = Xb

## 5 random varible generate

In [None]:
samples = np.random.normal(size=(5, 4))
samples.shape[0]

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pylab as plt
import numpy as np

nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()
plt.plot(walk)