In [1]:
import numpy as np

# 1- Create Arrays

In [2]:
data1 = [1,2,3,4,5] # list
arr1 = np.array(data1) # 1d array

data2 = [range(1, 5), range(5, 9)] # list of lists
arr2 = np.array(data2) # 2d array
arr2.tolist() # convert array back to list

[[1, 2, 3, 4], [5, 6, 7, 8]]

### special arrays

In [3]:
np.zeros(10)
np.zeros((3, 6))
np.ones(10)
np.linspace(0, 1, 5) # 0 to 1 (inclusive) with 5 points
np.logspace(0, 3, 4) # 10^0 to 10^3 (inclusive) with 4 points

array([   1.,   10.,  100., 1000.])

In [4]:
# arange is like range, except it returns an array (not a list)
int_array = np.arange(5)
float_array = int_array.astype(float)
float_array

array([0., 1., 2., 3., 4.])

## 2- Examining Arrays

In [5]:
arr1.dtype # float64
arr2.ndim # 2
arr2.shape # (2, 4) - axis 0 is rows, axis 1 is columns
arr2.size # 8 - total number of elements
len(arr2) # 2 - size of first dimension (aka axis)

2

## 3- Reshaping

In [6]:
arr = np.arange(10, dtype=float).reshape((2, 5))
print(arr.shape)
print(arr.reshape(5, 2))

(2, 5)
[[0. 1.]
 [2. 3.]
 [4. 5.]
 [6. 7.]
 [8. 9.]]


### add a new axis

In [7]:
a = np.array([0, 1])
a_col = a[:, np.newaxis]
print(a_col)
#or
a_col = a[:, None]

[[0]
 [1]]


### transpose

In [8]:
print(a_col.T)

[[0 1]]


### Flatten: returns a flat copy of the orriginal array

In [9]:
arr_flt = arr.flatten()
arr_flt[0] = 33
print(arr_flt)
print(arr)

[33.  1.  2.  3.  4.  5.  6.  7.  8.  9.]
[[0. 1. 2. 3. 4.]
 [5. 6. 7. 8. 9.]]


## 4- Summary on axis, reshaping/flattening and selection

<img src="https://duchesnay.github.io/pystatsml/_images/numpy_array3d.png" width=300 height=300 />

In [10]:
x = np.arange(2 * 3 * 4)
print(x)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


In [11]:
# reshape into 3D (axis 0, axis 1, axis 2)
x = x.reshape(2, 3, 4)
print(x)

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


In [12]:
# Selection get first plan
print(x[0, :, :])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [13]:
# Selection get first rows
print(x[:, 0, :])

[[ 0  1  2  3]
 [12 13 14 15]]


In [14]:
# Selection get first columns
print(x[:, :, 0])

[[ 0  4  8]
 [12 16 20]]


### Exercise:
* get second line
* get third column

In [15]:
arr = np.arange(10, dtype=float).reshape((2, 5))
print(arr)

arr[1, :]  # second line
arr[:, 2]  # third column

[[0. 1. 2. 3. 4.]
 [5. 6. 7. 8. 9.]]


array([2., 7.])

In [16]:
print(arr.flatten())

[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]


## 5- Stack Arrays

In [17]:
a = np.array([0, 1])
b = np.array([2, 3])

### horizontal stacking

In [18]:
np.hstack([a, b])

array([0, 1, 2, 3])

### vertical stacking

In [19]:
np.vstack([a,b])

array([[0, 1],
       [2, 3]])

In [20]:
# default vertical
np.stack([a,b])

array([[0, 1],
       [2, 3]])

## 6- Selection

In [21]:
# single item
arr = np.arange(10, dtype=float).reshape((2, 5))

arr[0]         # 0th element (slices like a list)
arr[0, 3]      # row 0, column 3: returns 4
arr[0][3]      # alternative syntax

3.0

### slicing
* Syntax: start:stop:step with start (default 0) stop (default last) step (default 1)

In [22]:
arr[0, :]      # row 0: returns 1d array ([1, 2, 3, 4])
arr[:, 0]      # column 0: returns 1d array ([1, 5])
arr[:, :2]     # columns strictly before index 2 (2 first columns)
arr[:, 2:]     # columns after index 2 included
arr2 = arr[:, 1:4]  # columns between index 1 (included) and 4 (excluded)
print(arr2)

[[1. 2. 3.]
 [6. 7. 8.]]


In [23]:
# Row 0: reverse order
print(arr[0, ::-1])

[4. 3. 2. 1. 0.]


### Fancy indexing: Integer or boolean array indexing
- returns a copy not a view

In [24]:
# Integer array indexing
arr2 = arr[:, [1, 2, 3]] # return a copy
print(arr2)

arr2[0, 0] = 44
print(arr2)
print(arr)

[[1. 2. 3.]
 [6. 7. 8.]]
[[44.  2.  3.]
 [ 6.  7.  8.]]
[[0. 1. 2. 3. 4.]
 [5. 6. 7. 8. 9.]]


In [25]:
# Boolean array indexing
arr2 = arr[arr > 5] # return a copy
print(arr2)

arr2[0] = 44
print(arr2)
print(arr)

[6. 7. 8. 9.]
[44.  7.  8.  9.]
[[0. 1. 2. 3. 4.]
 [5. 6. 7. 8. 9.]]


In [26]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob'])
names == 'Bob' # returns a boolean array
names[names != 'Bob'] # logical selection
(names == 'Bob') | (names == 'Will') # keywords "and/or" don't work with boolean arrays
names[names != 'Bob'] = 'Joe' # assign based on a logical selection
np.unique(names) # set function

array(['Bob', 'Joe'], dtype='<U4')

## Vectorized Operations

In [27]:
nums = np.arange(5)
nums * 10                           # multiply each element by 10
nums = np.sqrt(nums)                # square root of each element
np.ceil(nums)                       # also floor, rint (round to nearest int)
np.isnan(nums)                      # checks for NaN
nums + np.arange(5)                 # add element-wise
np.maximum(nums, np.array([1, -2, 3, -4, 5]))  # compare element-wise

array([1.        , 1.        , 3.        , 1.73205081, 5.        ])

In [28]:
# Compute Euclidean distance between 2 vectors
vec1 = np.random.randn(10)
vec2 = np.random.randn(10)
dist = np.sqrt(np.sum((vec1 - vec2) ** 2))

In [29]:
# math and stats
rnd = np.random.randn(4, 2) # random normals in 4x2 array
rnd.mean()
rnd.std()
rnd.argmin()                # index of minimum element
rnd.sum()
rnd.sum(axis=0)             # sum of columns
rnd.sum(axis=1)             # sum of rows

array([-1.20552663, -0.28814891,  2.43117722, -0.76518707])

In [30]:
# methods for boolean arrays
(rnd > 0).sum()             # counts number of positive values
(rnd > 0).any()             # checks if any value is True
(rnd > 0).all()             # checks if all values are True

False

In [31]:
# random numbers
np.random.seed(12234)       # Set the seed
np.random.rand(2, 3)        # 2 x 3 matrix in [0, 1]
np.random.randn(10)         # random normals (mean 0, sd 1)
np.random.randint(0, 2, 10) # 10 randomly picked 0 or 1

array([0, 0, 0, 1, 1, 0, 1, 1, 1, 1])