# Python Data Science Handbook - Numpy Arrays, Slicing, Indexing, Concatenating, Splitting

In [1]:
import numpy

In [2]:
numpy.__version__

'1.12.1'

In [3]:
import numpy as np

## Python Lists

In [4]:
L = list(range(10))
L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [5]:
type(L[0])

int

In [6]:
L2 = [str(c) for c in L]
L2

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [7]:
type(L2[0])

str

In [9]:
L3 = [True, "2", 3.0, 4]
[type(item) for item in L3]

[bool, str, float, int]

## Fixed-Type Arrays in Python

In [11]:
import array
L = list(range(10))
A = array.array('i', L)
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## Creating Arrays from Python Lists

In [12]:
# integer array:
np.array([1,4,2,5,3])

array([1, 4, 2, 5, 3])

Unlike Python lists, all data in NumPy arrays must be the same type. NumPy will upcast if possible. (ex. integers are upcast to floats)

In [14]:
np.array([3.14,4,2,3])

array([ 3.14,  4.  ,  2.  ,  3.  ])

In [15]:
# explicitly set data type
np.array([1,2,3,4], dtype='float32')

array([ 1.,  2.,  3.,  4.], dtype=float32)

In [23]:
# nested lists result in multidimentional arrays
np.array([range(i, i+3) for i in [2,4,6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

## Creating Arrays from Scratch

#### np.zeros

In [24]:
# Create a length-10 integer array filled with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### np.ones

In [25]:
# Create a 3x5 floating-point array filled with ones
np.ones((3,5), dtype=float)

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

#### np.full

In [26]:
# Create a 3x5 array filled with 3.14
np.full((3,5), 3.14)

array([[ 3.14,  3.14,  3.14,  3.14,  3.14],
       [ 3.14,  3.14,  3.14,  3.14,  3.14],
       [ 3.14,  3.14,  3.14,  3.14,  3.14]])

#### np.arange

In [27]:
# Create an array filled with a linear sequence
# Starting at 0, ending at 20, stepping by 2
# This is similar to the built-in range() function
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

#### np.linspace

In [28]:
# Create an array of five values evenly spaced between 0 and 1
np.linspace(0,1,5)

array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

#### np.random.random

In [29]:
# Create a 3x3 array of uniformly distributed random values between 0 and 1
np.random.random((3,3))

array([[ 0.62634815,  0.8943754 ,  0.55803465],
       [ 0.07047609,  0.77017904,  0.32783195],
       [ 0.79250888,  0.58440723,  0.3935316 ]])

#### np.random.normal

In [31]:
# Create a 3x3 array of normally distributed random values
# with mean 0 and standard deviation of 1
np.random.normal(0,1,(3,3))

array([[ 0.9431333 ,  0.44269346, -0.38188104],
       [-1.09772296,  0.84337697, -1.94184563],
       [ 0.6255768 , -0.30771619,  0.07772006]])

#### np.random.randint

In [32]:
# Create a 3x3 array of random integers in the interval [0,10)
np.random.randint(0,10,(3,3))

array([[1, 0, 1],
       [1, 5, 5],
       [3, 8, 9]])

#### np.eye

In [33]:
# Create a 3x3 identity matrix
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

#### np.empty

In [34]:
# Create an uninitialized array of three integers
# The values will be whatever happens to already exist at that memory location
np.empty(3)

array([ 1.,  1.,  1.])

## NumPy Array Attributes

#### np.seed

*Seed* is used to make sure the same random numbers are generated each time the code is run.

In [35]:
np.random.seed(0) # seed for reproducibility

x1 = np.random.randint(10, size=6)
x2 = np.random.randint(10, size=(3,4))
x3 = np.random.randint(10, size=(3,4,5))

#### ndim, shape, size

In [38]:
print "x3 ndim: ", x3.ndim
print "x3 shape: ", x3.shape
print "x3 size: ", x3.size

x3 ndim:  3
x3 shape:  (3, 4, 5)
x3 size:  60


#### dtype

In [39]:
print "dtype: ", x3.dtype

dtype:  int64


#### itemsize, nbytes

In [43]:
# size of each item in array
print "itemsize :", x3.itemsize, "bytes"

# 60 items * 8 bytes each = 480 bytes
print "nbytes :", x3.nbytes, "bytes"

itemsize : 8 bytes
nbytes : 480 bytes


## Array Indexing: Accessing Single Elements

In [44]:
# grab x1 from before
x1

array([5, 0, 3, 3, 7, 9])

In [45]:
x1[0]

5

In [46]:
x1[4]

7

In [47]:
x1[-1]

9

In [48]:
x1[-2]

7

#### Multidimensional Indexing

In [49]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [50]:
x2[0,0]

3

In [51]:
x2[2,0]

1

In [52]:
x2[2,-1]

7

#### Modifying Values

In [53]:
x2[0,0] = 12
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [54]:
# Arrays have a fixed type
x1[0] = 3.14159 # this will be truncated!
x1

array([3, 0, 3, 3, 7, 9])

## Array Slicing: Accessing Subarrays

#### One-dimensional subarrays

In [55]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [56]:
x[:5] # first 5 elements

array([0, 1, 2, 3, 4])

In [57]:
x[5:] # elements after index 5

array([5, 6, 7, 8, 9])

In [58]:
x[4:7] # middle subarray

array([4, 5, 6])

In [59]:
x[::2] # every other element

array([0, 2, 4, 6, 8])

In [60]:
x[1::2] # every other element starting at index 1

array([1, 3, 5, 7, 9])

In [61]:
x[::-1] # all elements, reversed

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [65]:
x[5::-2] # reversed every other from index 5

array([5, 3, 1])

#### Multidimensional subarrays

In [66]:
x2

array([[12,  5,  2,  4],
       [ 7,  6,  8,  8],
       [ 1,  6,  7,  7]])

In [67]:
x2[:2,:3] # two rows, three columns

array([[12,  5,  2],
       [ 7,  6,  8]])

In [68]:
x2[:3, ::2] # all rows, every other column

array([[12,  2],
       [ 7,  8],
       [ 1,  7]])

In [69]:
x2[::-1,::-1] # reverse row & column

array([[ 7,  7,  6,  1],
       [ 8,  8,  6,  7],
       [ 4,  2,  5, 12]])

#### Accessing array rows and columns

In [70]:
print x2[:,0] # first column of x2

[12  7  1]


In [71]:
print x2[0,:] # first row of x2

[12  5  2  4]


In [72]:
print x2[0] # equivalent to above

[12  5  2  4]


#### Subarrays as no-copy views

In [73]:
print x2

[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


In [75]:
x2_sub = x2[:2, :2]
print x2_sub

[[12  5]
 [ 7  6]]


In [76]:
# If we modify subarray, original array will also change
x2_sub[0,0] = 99
print x2_sub

[[99  5]
 [ 7  6]]


In [77]:
print x2

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


#### Creating copies of arrays

In [79]:
x2_sub_copy = x2[:2,:2].copy()
print x2_sub_copy

[[99  5]
 [ 7  6]]


In [82]:
# If we make a copy, original will not change
x2_sub_copy[0,0] = 42
print x2_sub_copy

[[42  5]
 [ 7  6]]


In [83]:
print x2

[[99  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]


## Reshaping of Arrays

#### reshape()

In [84]:
grid = np.arange(1,10).reshape((3,3))
print grid

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [85]:
x = np.array([1,2,3])
x.reshape((1,3)) # one column w/ 3 rows

array([[1, 2, 3]])

#### np.newaxis

In [86]:
# does the same as previous
x[np.newaxis,:]

array([[1, 2, 3]])

In [87]:
x.reshape((3,1))

array([[1],
       [2],
       [3]])

In [88]:
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

## Array Concatenation and Splitting

#### np.concatenate

In [89]:
x = np.array([1,2,3])
y = np.array([3,2,1])
np.concatenate([x,y])

array([1, 2, 3, 3, 2, 1])

In [90]:
# Can also concatenate more than 2 arrays at once
z = [99,99,99]
print np.concatenate([x,y,z])

[ 1  2  3  3  2  1 99 99 99]


In [92]:
# Can also use for 2D arrays
grid = np.array([[1,2,3],[4,5,6]])
np.concatenate([grid,grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [93]:
np.concatenate([grid,grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

#### np.vstack

In [94]:
# Working with arrays of mixed dimensions
x = np.array([1,2,3])
grid = np.array([[9,8,7],[6,5,4]])

# vertically stack the arrays
np.vstack([x,grid])

array([[1, 2, 3],
       [9, 8, 7],
       [6, 5, 4]])

#### np.hstack

In [95]:
# horizontally stack arrays
y = np.array([[99],
              [99]])
np.hstack([grid,y])

array([[ 9,  8,  7, 99],
       [ 6,  5,  4, 99]])

#### np.split

In [96]:
x = [1,2,3,99,99,3,2,1]
x1, x2, x3 = np.split(x,[3,5])
print x1, x2, x3

[1 2 3] [99 99] [3 2 1]


#### np.vsplit

In [97]:
grid = np.arange(16).reshape((4,4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [98]:
# vertical split
upper, lower = np.vsplit(grid, [2])
print upper
print lower

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


#### np.hsplit

In [99]:
# horizontal split
left, right = np.hsplit(grid, [2])
print left
print right

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]
