### Reshaping Arrays 

In [3]:
import numpy as np
import pandas as pd

arr = np.arange(8)

arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [4]:
arr.reshape(2,4)

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [5]:
# Multidimensional array can also be reshaped

arr.reshape((4,2)).reshape((2,4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [6]:
# Put in -1 for shape dimensions in which case the value will be inferred from the data

arr =np.arange(15)

arr.reshape(3,-1)  # system inferred 5

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [7]:
other_arr = np.ones((3,5))
other_arr.shape #attribute

(3L, 5L)

In [8]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [9]:
# flattening: from higher dimension to one dimension returns a copy of the data

arr = np.arange(15).reshape((5,3))

arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [10]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [11]:
arr.ravel() # does not produce a copy of the data if doesn't have to

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [12]:
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

### Concatenating and Splitting Arrays

`numpy.concatenate` takes a sequence of arrays and joins them together along an axis

In [16]:
arr1 = np.array([[1,2,3], [4,5,6]])
arr2=np.array([[7,8,9], [10,11,12]])

print np.concatenate([arr1, arr2], axis=0) #changes rows
print np.vstack((arr1,arr2))


print np.concatenate([arr1,arr2], axis=1) # changes columns
print np.hstack((arr1,arr2))

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]
[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]


** split(arr, indices)**

indices : inicates where along axis the array is split. If it's an integar (N), then the array will be split into N equal parts 

In [17]:
# split slices apart an array into multiple arrays along an axis

from numpy.random import randn

arr = randn(5,2)

arr

array([[-0.25799264, -0.22810004],
       [-0.86273416,  0.6731616 ],
       [ 1.20783482,  1.34786143],
       [-0.48672972, -0.15379991],
       [-1.85281013,  1.42949362]])

In [23]:
first, second, third = np.split(arr, [1,3])

print first # arr[:1]
print''
print second # arr[1:2]
print ''
print third # arr[2:3]

[[-0.25799264 -0.22810004]]

[[-0.86273416  0.6731616 ]
 [ 1.20783482  1.34786143]]

[[-0.48672972 -0.15379991]
 [-1.85281013  1.42949362]]


### Fancy Indexing Equivalents: take and put

One way to get and set subsets of arrays is by fancy indexing using integer arrays

In [26]:
arr = np.arange(10) * 100

inds = [7,1,2,6]

arr[inds]

array([700, 100, 200, 600])

In [27]:
# An alternate method is to use take

arr.take(inds)

array([700, 100, 200, 600])

In [30]:
# to replace values at an index with another value, we can use put

arr.put(inds, 424242)

arr

array([     0, 424242, 424242,    300,    400,    500, 424242, 424242,
          800,    900])

In [32]:
arr.put(inds, [40,41,42,43]) # Per index, input number in array
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [33]:
# To use take along other axes, we can pass the axis keyword

inds=[2,0,2,1]

arr=randn(2,4)

arr

array([[ 0.26658552,  0.25641679,  0.31177091, -1.07536502],
       [-0.71682836,  0.0089269 , -1.55081869, -0.0032271 ]])

In [35]:
arr.take(inds,axis=1) # taking column 2, 0, 2, 1

array([[ 0.31177091,  0.26658552,  0.31177091,  0.25641679],
       [-1.55081869, -0.71682836, -1.55081869,  0.0089269 ]])

### Broadcasting

Describes how arithmetic works between arrays of different shapes. It can be easily misunderstood however.

**Follow the rule: 2 Arrays are compatible for broadcasting if for each trailing dimension (starting from the end), the axis lengths match or if either of the lengths = 1. Broadcasting is then performed over the missing and / or length 1 dimensions**

In [40]:
arr = np.arange(5)

arr * 4 # arithmetic on every elmeent of the array or broadcasted to all elements

array([ 0,  4,  8, 12, 16])

In [41]:
arr = randn(4,3)
arr

array([[-1.20637123, -0.21027611, -1.06106911],
       [ 0.09104132,  1.55749126, -1.29406101],
       [-1.26023739, -0.57713345,  0.06221236],
       [ 0.35295214,  0.33398626,  0.15438978]])

In [42]:
arr.mean(0) # provide a mean along axis 0 

array([-0.50565379,  0.27601699, -0.534632  ])

We wish to subtract the mean value from each row. Since arr.mean(0) has length 3, it is compatible for broadcasting across axis 0 since the trailing dimension in arr is 3. 

In [44]:
# demean each column by subtracting hte column means

demeaned = arr - arr.mean(0)
demeaned

array([[-0.70071744, -0.4862931 , -0.52643712],
       [ 0.59669511,  1.28147427, -0.75942902],
       [-0.7545836 , -0.85315044,  0.59684435],
       [ 0.85860593,  0.05796927,  0.68902178]])

To subtract over axis 1 (subtracting the row mean from each row), the smaller array must have shape (4,1)

In [45]:
arr

array([[-1.20637123, -0.21027611, -1.06106911],
       [ 0.09104132,  1.55749126, -1.29406101],
       [-1.26023739, -0.57713345,  0.06221236],
       [ 0.35295214,  0.33398626,  0.15438978]])

In [48]:
row_means = arr.mean(1)
row_means # 4x1 array

array([-0.82590549,  0.11815719, -0.5917195 ,  0.28044273])

In [54]:
#  row_means's last trailing dimension must be 1

print arr # 4x3 array
print row_means.reshape(4,1)

[[-1.20637123 -0.21027611 -1.06106911]
 [ 0.09104132  1.55749126 -1.29406101]
 [-1.26023739 -0.57713345  0.06221236]
 [ 0.35295214  0.33398626  0.15438978]]
[[-0.82590549]
 [ 0.11815719]
 [-0.5917195 ]
 [ 0.28044273]]


In [57]:
deamaned = arr - row_means.reshape((4,1))
demeaned.mean(1)

array([-0.57114922,  0.37291346, -0.33696323,  0.53519899])

### Sorting

The ndarray `sort` instance method is an in-place sort with replacement

In [59]:
arr = randn(6)

arr.sort()

arr

array([-0.15208162,  0.20742022,  0.31048611,  0.39781354,  0.46541068,
        0.69374075])

In [60]:
arr = randn(3,5)

arr

array([[-0.91145304, -0.55908415, -2.10700118,  1.68774727, -0.78536204],
       [-2.82665686,  1.11442093, -1.13903283,  0.19387754, -0.63080674],
       [ 0.10714136,  0.95794439, -2.41085742, -0.38911444,  1.1774424 ]])

In [62]:
arr[:,0].sort() # sort all rows by column 0

arr

array([[-2.82665686, -0.55908415, -2.10700118,  1.68774727, -0.78536204],
       [-0.91145304,  1.11442093, -1.13903283,  0.19387754, -0.63080674],
       [ 0.10714136,  0.95794439, -2.41085742, -0.38911444,  1.1774424 ]])

To not sort with replacement, use `numpy.sort`. It creates a new, sorted copy of an array. 

In [65]:
arr = randn(5)

print arr

print np.sort(arr)

print arr

[-0.02550387  0.47530212  0.53139121  0.15919149 -0.71404591]
[-0.71404591 -0.02550387  0.15919149  0.47530212  0.53139121]
[-0.02550387  0.47530212  0.53139121  0.15919149 -0.71404591]


In [75]:
# Take an axis argument

arr = randn(5,4)

arr

array([[-1.06598915,  1.16609753, -0.3268122 , -0.10638084],
       [ 0.61028227, -0.90689118,  0.93218303, -1.19323377],
       [-1.15519559,  1.66067732, -0.11922245,  0.1748295 ],
       [ 0.05996118, -0.36144986, -0.05469024, -0.82517769],
       [-0.84162825,  0.61835402,  0.66283417,  0.07750631]])

In [76]:
arr.sort(axis=1) # sorts vertically rows 

arr

array([[-1.06598915, -0.3268122 , -0.10638084,  1.16609753],
       [-1.19323377, -0.90689118,  0.61028227,  0.93218303],
       [-1.15519559, -0.11922245,  0.1748295 ,  1.66067732],
       [-0.82517769, -0.36144986, -0.05469024,  0.05996118],
       [-0.84162825,  0.07750631,  0.61835402,  0.66283417]])

In [78]:

arr=randn(5,4)
arr.sort(axis=0) # sorts horizontally  columns
arr

array([[ 0.48008632, -0.93257359, -0.23354868, -0.83755944],
       [ 0.60300127,  0.15971159, -0.22731609, -0.4696589 ],
       [ 0.63523396,  0.28745373,  0.05169063, -0.36007198],
       [ 0.9667719 ,  1.00804425,  0.88736646,  1.22817975],
       [ 2.08476925,  1.44589048,  1.39066994,  1.65925196]])