## Understanding Data Types in Python

In [2]:
import numpy as np

In [3]:
import array 
L = list(range(10))
A = array.array('i', L)
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Here `'i'` is a type code indicating the contents are integers.

In [4]:
# integer array:
np.array(range(1, 5))

array([1, 2, 3, 4])

In [5]:
np.array([1, 4, 2, 5, 3])

array([1, 4, 2, 5, 3])

Unlike Python lists, NumPy is constrained to arrays that all contain the same type. If ttypes do not match, NumPy will upcase if possible.

In [6]:
np.array([3.14, 5, 2, 1])

array([3.14, 5.  , 2.  , 1.  ])

In [7]:
# set datatype explicitly
np.array([1, 2, 3, 4], dtype='float32')

array([1., 2., 3., 4.], dtype=float32)

NumPy arrays can explicitly be multi-dimensional:


In [8]:
# nest lists reult in multi-dimensional array
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

## Create Arrays from Scratch

In [9]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:
# Create a 3x5 floating-point array filled with ones
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [12]:
# Create a 3x5 array filled with 3.14
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [13]:
# Create an array filled with a linear sequence 
# Starting at 0, ending at 20, stepping by 2.
# (this is similar to the built-in range() function)
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [14]:
# Create an array of five values evently spaced between 0 and 1
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [16]:
np.linspace(0, 1, 10)

array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])

In [18]:
# create a 3x3 array of uniformly distributed 
# random values between 0 and 1
np.random.random((3, 3))

array([[0.67131628, 0.42731828, 0.93856243],
       [0.07146304, 0.62210284, 0.59252692],
       [0.45659136, 0.43578205, 0.09108   ]])

In [19]:
# Create a 3x3 array of normally distribted random values 
# with mean 0 and stardard deviation 1
np.random.normal(0, 1, (3, 3))

array([[-0.59390493, -0.24458159,  1.76871424],
       [-0.88873466,  1.37495137,  0.07875476],
       [-2.00301955,  1.07895984, -1.34756912]])

In [20]:
# Create a 3x3 array of random integers in the interval [0, 10]
np.random.randint(0, 10, (3, 3))

array([[2, 1, 5],
       [0, 3, 2],
       [4, 4, 1]], dtype=int32)

In [21]:
# create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [22]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that memory location
np.empty(3)

array([1., 1., 1.])

## NumPy Standard Data Types

In [23]:
np.zeros(10, dtype='int16')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [24]:
np.zeros(10, dtype=np.int16)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

## NumPy Array Attributes

In [25]:
np.random.seed(0) 

In [26]:
x1 = np.random.randint(10, size=6) # One-dimensional array
x2 = np.random.randint(10, size=(3, 4)) # 2-dimensional
x3 = np.random.randint(10, size=(3, 4, 5))    

Each array has attributes `ndim` - the numbero of dimensions, `shape` -- the size of each dimension, `size` - the total size of the array

In [29]:
print(f'x3 dim: {x3.ndim}')

x3 dim: 3


In [30]:
print(f'x3 shape: {x3.shape}')

x3 shape: (3, 4, 5)


In [31]:
print(f'x3 size: {x3.size}')

x3 size: 60


In [32]:
print('dtype:', x3.dtype)

dtype: int32


In [33]:
print(f'item size: {x3.itemsize} bytes')

item size: 4 bytes


In [34]:
print(f'nbytes: {x3.nbytes} bytes')

nbytes: 240 bytes


## Array Slicing: Accessing Subarrays

In [None]:
x[start:stop:step]

In [36]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [37]:
x[:5]

array([0, 1, 2, 3, 4])

In [38]:
x[5:]

array([5, 6, 7, 8, 9])

In [39]:
x[4:7]

array([4, 5, 6])

In [40]:
x[::2]

array([0, 2, 4, 6, 8])

In [42]:
x[1::2]

array([1, 3, 5, 7, 9])

When `step` value is negative, the default for `start` and `stop` are swapped, reversing an array:

In [43]:
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [44]:
x[5::-2]

array([5, 3, 1])

### Multi-dimensional subarrays

Milti-dimensioanl slices work in the same way, with multiple slices separated by commas.

In [45]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]], dtype=int32)

In [48]:
x2[:2, :3] # two rows, three columns

array([[3, 5, 2],
       [7, 6, 8]], dtype=int32)

In [49]:
x2[:3, ::2]

array([[3, 2],
       [7, 8],
       [1, 7]], dtype=int32)

In [51]:
x2[::-1, ::-1] # reverse the who array

array([[7, 7, 6, 1],
       [8, 8, 6, 7],
       [4, 2, 5, 3]], dtype=int32)

#### Accessing array rows and columns

In [52]:
x2[:, 0] # first column of x2

array([3, 7, 1], dtype=int32)

In [55]:
x2[0, :] # first row

array([3, 5, 2, 4], dtype=int32)

In [56]:
x2[0] # equivalent to x2[0, :]

array([3, 5, 2, 4], dtype=int32)

### Subarrays as no-copy views

One import-and extremely useful thing to know about array slices is that they return views rather than copies of the array data.

In [58]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]], dtype=int32)

In [59]:
# Let's extract a 2x2 subarray
x2_sub = x2[:2, :2]
x2_sub

array([[3, 5],
       [7, 6]], dtype=int32)

Now if we modify this subarray, the original array will be changed.

In [60]:
x2_sub[0, 0] = 99
x2_sub

array([[99,  5],
       [ 7,  6]], dtype=int32)

In [61]:
x2

array([[99,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]], dtype=int32)

This default behavior is actually quite useful: it means that when we work with large datasets, we can access and process pieces of the datasets without the need to copy the underlying data buffer.

### Creating copies of arrays

In [62]:
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy

array([[99,  5],
       [ 7,  6]], dtype=int32)

In [63]:
x2_sub_copy[0, 0] = 42
x2_sub_copy

array([[42,  5],
       [ 7,  6]], dtype=int32)

In [64]:
# does not change the original array
x2

array([[99,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]], dtype=int32)

## Reshaping of Arrays

In [65]:
grid = np.arange(1, 10).reshape((3, 3))
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

For this to work, the size of the initial array must match the size of the reshaped array. Where possible, the `reshape` method will use a no-copy view of the initial array, but with non-contiguous memory buffer this is not always the case.

Another common reshaping pattern is the conversion of a one-dimensional array into a two-dimensional row or column matrix. This can be done with the `reshape` method, or more easily done by making use of the `newaxis` keyword within a slice operation.

In [66]:
x = np.array([1, 2, 3])
x

array([1, 2, 3])

In [67]:
x.reshape((1, 3))

array([[1, 2, 3]])

In [69]:
x[np.newaxis, :] # row vector via newaxis

array([[1, 2, 3]])

In [70]:
x.reshape((3, 1))

array([[1],
       [2],
       [3]])

In [71]:
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

## Array Concatenation and Splitting

In [73]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [74]:
z = np.array([99, 99, 99])
np.concatenate([x, y, z])

array([ 1,  2,  3,  3,  2,  1, 99, 99, 99])

In [76]:
# 2 dimensional arrays
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])

# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [78]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

For working with arrays of mixed dimensions, it can be clearer to use the `np.vstack` and `np.hstack` functions:

In [80]:
x = np.array([1, 2, 3])
grid = np.array([[9, 8, 7],
                 [6, 5, 4]])

# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

In [81]:
# horizontally stack the arrays
y = np.array([[99],
              [99]])
np.hstack([grid, y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

Similary, `np.dstack` will stack arrays aling the third axis.

### Splitting of arrays

In [83]:
x = np.array([1, 2, 3, 99, 99, 3, 2, 1])
x1, x2, x3 = np.split(x, [3, 5]) # [3, 5] are indexes of the next split
x1, x2, x3

(array([1, 2, 3]), array([99, 99]), array([3, 2, 1]))

In [85]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [88]:
upper, lower = np.vsplit(grid, [2])
upper, lower

(array([[0, 1, 2, 3],
        [4, 5, 6, 7]]),
 array([[ 8,  9, 10, 11],
        [12, 13, 14, 15]]))

In [90]:
left, right = np.hsplit(grid, [2])
left, right

(array([[ 0,  1],
        [ 4,  5],
        [ 8,  9],
        [12, 13]]),
 array([[ 2,  3],
        [ 6,  7],
        [10, 11],
        [14, 15]]))