## Task
Explore reshaping arrays in NumPy

## Notebook Summary
* `reshape`, `newaxis`
* `.T`, `transpose`
* `flatten`, `ravel`
* Stack - `concatenate`, `hstack`, `vstack`, `dstack`
* Split - `split`, `hstack`, `vstack`, `dstack`

## References
* *Python for Data Analysis*, Wes McKinney, O'Reilly, 2012
* *Numerical Python*, Robert Johansson, APress, 2015
* *Python Data Science Handbook*, Jake VanderPlas, O'Reilly, 2016


In [2]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import platform
print 'python.version = ', platform.python_version()
import IPython
print 'ipython.version =', IPython.version_info

import numpy as np
print 'numpy.version = ', np.__version__


python.version =  2.7.10
ipython.version = (5, 1, 0, '')
numpy.version =  1.11.3


In [6]:
# reshape - use views when possible; may create copies when original array is not contiguous

np.arange(12)
np.arange(12).reshape(4,3)
np.arange(12).reshape(4,3).reshape(6,2)

np.arange(12).reshape(6,2).reshape(6,2,1)


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])

array([[[ 0],
        [ 1]],

       [[ 2],
        [ 3]],

       [[ 4],
        [ 5]],

       [[ 6],
        [ 7]],

       [[ 8],
        [ 9]],

       [[10],
        [11]]])

In [35]:
# add an axis via newaxis & reshape; cannot use transpose() here

arr = np.arange(5) # row vector
arr
print 'original array: shape =', arr.shape

print 'col vector using newaxis'
arr[:,np.newaxis]
print 'shape =', arr[:,np.newaxis].shape

print 'col vector using newaxis'
arr.reshape(5,1)
print 'shape = ', arr.reshape(5,1).shape


print '\n----- row vector using newaxis'
arr[np.newaxis,:]
print 'shape =', arr[np.newaxis,:].shape

arr.reshape(1,5)
print 'shape =', arr.reshape(1,5).shape


array([0, 1, 2, 3, 4])

original array: shape = (5,)
col vector using newaxis


array([[0],
       [1],
       [2],
       [3],
       [4]])

shape = (5, 1)
col vector using newaxis


array([[0],
       [1],
       [2],
       [3],
       [4]])

shape =  (5, 1)

----- row vector using newaxis


array([[0, 1, 2, 3, 4]])

shape = (1, 5)


array([[0, 1, 2, 3, 4]])

shape = (1, 5)


In [28]:
# Transposing arrays
# all transposing returns a view of the array and not a copy

print '----- Original array'
myarr = np.arange(15).reshape(5,3)
myarr

print '\n----- Transposed array'
myarr.transpose()
myarr.T

myarr.shape
myarr.T.shape

np.dot(myarr.T, myarr) # X'X

print '\n----- swapaxes'

# swapaxes - both statements below are equivalent
print 'swap axes of 2d array - both arrays below are identical to the ones above from .T and transpose()'
myarr.swapaxes(0,1)
myarr.swapaxes(1,0)

print '\n----- swap axes of 3d array'
myarr = np.arange(24).reshape(2,3,4)
myarr

myarr.swapaxes(0,2)
myarr.transpose(2,1,0)


----- Original array


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])


----- Transposed array


array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

(5, 3)

(3, 5)

array([[270, 300, 330],
       [300, 335, 370],
       [330, 370, 410]])


----- swapaxes
swap axes of 2d array - both arrays below are identical to the ones above from .T and transpose()


array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])


----- swap axes of 3d array


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

array([[[ 0, 12],
        [ 4, 16],
        [ 8, 20]],

       [[ 1, 13],
        [ 5, 17],
        [ 9, 21]],

       [[ 2, 14],
        [ 6, 18],
        [10, 22]],

       [[ 3, 15],
        [ 7, 19],
        [11, 23]]])

array([[[ 0, 12],
        [ 4, 16],
        [ 8, 20]],

       [[ 1, 13],
        [ 5, 17],
        [ 9, 21]],

       [[ 2, 14],
        [ 6, 18],
        [10, 22]],

       [[ 3, 15],
        [ 7, 19],
        [11, 23]]])

In [21]:
# ravel, flatten 
# ravel - returns a view when possible; is a library-level and array-level function
# flatten - always returns a copy; is an array-level function only

print '\n----- Original array'
myarr = np.round(np.random.rand(12),2).reshape(4,3)
myarr

# flatten - always returns a copy of the input array
f = myarr.flatten() # use order param to change order of flattening
print '\n----- Flattened array'
f

print '\n----- Flattened array with some values modified'
f[[0,2,4]] = -88 # does not modify original array
f

print '\n----- Original array - not affected by flatten'
myarr


print '\n----- ravel - returns a view when possible'
np.ravel(myarr)
r = myarr.ravel()
r
print 'shape of ravel-ed array = ', r.shape

r[[1,3,5,7]] = -99 # modified original array since r is a view
r

print '\n----- Original array is modified'
myarr



----- Original array


array([[ 0.28,  0.36,  0.21],
       [ 0.78,  0.27,  0.98],
       [ 0.35,  0.1 ,  0.73],
       [ 0.78,  0.69,  0.4 ]])


----- Flattened array


array([ 0.28,  0.36,  0.21,  0.78,  0.27,  0.98,  0.35,  0.1 ,  0.73,
        0.78,  0.69,  0.4 ])


----- Flattened array with some values modified


array([-88.  ,   0.36, -88.  ,   0.78, -88.  ,   0.98,   0.35,   0.1 ,
         0.73,   0.78,   0.69,   0.4 ])


----- Original array - not affected by flatten


array([[ 0.28,  0.36,  0.21],
       [ 0.78,  0.27,  0.98],
       [ 0.35,  0.1 ,  0.73],
       [ 0.78,  0.69,  0.4 ]])


----- ravel - returns a view when possible


array([ 0.28,  0.36,  0.21,  0.78,  0.27,  0.98,  0.35,  0.1 ,  0.73,
        0.78,  0.69,  0.4 ])

array([ 0.28,  0.36,  0.21,  0.78,  0.27,  0.98,  0.35,  0.1 ,  0.73,
        0.78,  0.69,  0.4 ])

shape of ravel-ed array =  (12,)


array([  0.28, -99.  ,   0.21, -99.  ,   0.27, -99.  ,   0.35, -99.  ,
         0.73,   0.78,   0.69,   0.4 ])


----- Original array is modified


array([[  0.28, -99.  ,   0.21],
       [-99.  ,   0.27, -99.  ],
       [  0.35, -99.  ,   0.73],
       [  0.78,   0.69,   0.4 ]])

In [36]:
# concatenating & stacking
# concate - default axis = 0

print '----- Row vector'
myarr = np.arange(5)
myarr # row vector

print '----- vstack - as rows in a matrix'
np.vstack([myarr, myarr, myarr]) # row vector as rows of matrix

# row vector as columns of matrix
print '----- hstack - as columns in a matrix'
np.hstack([myarr, myarr, myarr])

print 'transpose will not work'
np.hstack([myarr.T, myarr.T]) # transpose will not work to convert row vectors to column vectors

print 'reshape will work'
np.hstack([myarr.reshape(5,1), myarr.reshape(5,1)]) 

print 'np.newaxis will work'
np.hstack([myarr[:,np.newaxis], myarr[:,np.newaxis]]) # must use newaxis 


print '\n-----'

myarr = np.arange(24).reshape(2,3,4)
myarr
myarr.shape

print '\n----- hstack on 3d array'
np.hstack([myarr, myarr])
(np.hstack([myarr, myarr])).shape

print 'Same as using concatenate with axis=1'
np.concatenate([myarr, myarr], axis=1) # same as hstack
(np.concatenate([myarr, myarr], axis=1)).shape


print '\n----- vstack on 3d array'
np.vstack([myarr, myarr])
(np.vstack([myarr, myarr])).shape

print 'Same as using concatenate with axis=0'
np.concatenate([myarr, myarr], axis=0) # same as vstack
(np.concatenate([myarr, myarr], axis=0)).shape


print '\n----- dstack on 3d array'
np.dstack([myarr, myarr])
(np.dstack([myarr, myarr])).shape

print 'Same as using concatenate with axis=2'

np.concatenate([myarr, myarr], axis=2) # same as dstack
(np.concatenate([myarr, myarr], axis=2)).shape


----- Row vector


array([0, 1, 2, 3, 4])

----- vstack - as rows in a matrix


array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

----- hstack - as columns in a matrix


array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4])

transpose will not work


array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])

reshape will not work


array([[0, 0],
       [1, 1],
       [2, 2],
       [3, 3],
       [4, 4]])

Only np.newaxis will work


array([[0, 0],
       [1, 1],
       [2, 2],
       [3, 3],
       [4, 4]])


-----


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

(2, 3, 4)


----- hstack on 3d array


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23],
        [12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

(2, 6, 4)

Same as using concatenate with axis=1


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23],
        [12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

(2, 6, 4)


----- vstack on 3d array


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]],

       [[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

(4, 3, 4)

Same as using concatenate with axis=0


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]],

       [[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

(4, 3, 4)


----- dstack on 3d array


array([[[ 0,  1,  2,  3,  0,  1,  2,  3],
        [ 4,  5,  6,  7,  4,  5,  6,  7],
        [ 8,  9, 10, 11,  8,  9, 10, 11]],

       [[12, 13, 14, 15, 12, 13, 14, 15],
        [16, 17, 18, 19, 16, 17, 18, 19],
        [20, 21, 22, 23, 20, 21, 22, 23]]])

(2, 3, 8)

Same as using concatenate with axis=2


array([[[ 0,  1,  2,  3,  0,  1,  2,  3],
        [ 4,  5,  6,  7,  4,  5,  6,  7],
        [ 8,  9, 10, 11,  8,  9, 10, 11]],

       [[12, 13, 14, 15, 12, 13, 14, 15],
        [16, 17, 18, 19, 16, 17, 18, 19],
        [20, 21, 22, 23, 20, 21, 22, 23]]])

(2, 3, 8)

In [52]:
# split
# np.split raises error if array cannot be split into equal sized sub-arrays, array_split does not

arr = np.arange(24).reshape(6,4)
arr

f, s, t = np.split(arr, 3) # split into 3 subarrays with same number of rows
f
s
t

print '-----'

f,s = np.split(arr, [3]) # split after 3rd row
f
s

print '-----'

f,s = np.split(arr,[2], axis=1) # split after 2nd column
f
s

print '-----'

# f,s,t = np.split(arr, 3, axis=1) - will raise ValueError
f,s,t = np.array_split(arr, 3, axis=1) # will not raise exception
f
s
t


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

array([[16, 17, 18, 19],
       [20, 21, 22, 23]])

-----


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

array([[12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

-----


array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13],
       [16, 17],
       [20, 21]])

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15],
       [18, 19],
       [22, 23]])

-----


array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13],
       [16, 17],
       [20, 21]])

array([[ 2],
       [ 6],
       [10],
       [14],
       [18],
       [22]])

array([[ 3],
       [ 7],
       [11],
       [15],
       [19],
       [23]])

In [69]:
# hsplit, vsplit, dsplit

arr = np.arange(48).reshape(4,3,4)
arr

print '\n----- hsplit'

f,s,t = np.hsplit(arr, 3) # same as split with axis=1
f
s
t

print '\n----- vsplit'

f,s = np.vsplit(arr, 2) # same as split with axis=0
f
s

print '\n----- dsplit'

f,s = np.dsplit(arr, 2) # same as split with axis=2
f
s


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]],

       [[24, 25, 26, 27],
        [28, 29, 30, 31],
        [32, 33, 34, 35]],

       [[36, 37, 38, 39],
        [40, 41, 42, 43],
        [44, 45, 46, 47]]])


----- hsplit


array([[[ 0,  1,  2,  3]],

       [[12, 13, 14, 15]],

       [[24, 25, 26, 27]],

       [[36, 37, 38, 39]]])

array([[[ 4,  5,  6,  7]],

       [[16, 17, 18, 19]],

       [[28, 29, 30, 31]],

       [[40, 41, 42, 43]]])

array([[[ 8,  9, 10, 11]],

       [[20, 21, 22, 23]],

       [[32, 33, 34, 35]],

       [[44, 45, 46, 47]]])


----- vsplit


array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

array([[[24, 25, 26, 27],
        [28, 29, 30, 31],
        [32, 33, 34, 35]],

       [[36, 37, 38, 39],
        [40, 41, 42, 43],
        [44, 45, 46, 47]]])


----- dsplit


array([[[ 0,  1],
        [ 4,  5],
        [ 8,  9]],

       [[12, 13],
        [16, 17],
        [20, 21]],

       [[24, 25],
        [28, 29],
        [32, 33]],

       [[36, 37],
        [40, 41],
        [44, 45]]])

array([[[ 2,  3],
        [ 6,  7],
        [10, 11]],

       [[14, 15],
        [18, 19],
        [22, 23]],

       [[26, 27],
        [30, 31],
        [34, 35]],

       [[38, 39],
        [42, 43],
        [46, 47]]])