## Array Manipulation in Numpy

- Lets define the following 2D array where the rows are subjects (people) the first column is people weights and the second column is their heights

In [6]:
import numpy as np

X = np.array(
[[73.847017017515, 241.893563180437],
 [68.7819040458903, 162.3104725213],
 [74.1101053917849, 212.7408555565],
 [71.7309784033377, 220.042470303077],
 [69.8817958611153, 206.349800623871],
 [67.2530156878065, 152.212155757083],
 [68.7850812516616, 183.927888604031],
 [68.3485155115879, 167.971110489509],
 [67.018949662883, 175.92944039571],
 [63.4564939783664, 156.399676387112]]
)    


In [7]:
X.shape

(10, 2)

## the above values means we have 10 subjects (10 samples) each have two features (weight and height for each subject)
## or X has 10 rows and 2 columns  

## Activity: What is the mean of weights

In [9]:
print(X[:, 0])

[73.84701702 68.78190405 74.11010539 71.7309784  69.88179586 67.25301569
 68.78508125 68.34851551 67.01894966 63.45649398]


In [10]:
np.mean(X[:, 0])

69.32138568119487

## Activity: What is the mean of heights

In [11]:
print(X[:, 1])

[241.89356318 162.31047252 212.74085556 220.0424703  206.34980062
 152.21215576 183.9278886  167.97111049 175.9294404  156.39967639]


In [12]:
np.mean(X[:, 1])

187.97774338186304

## Activity: what is the mean of height for the first 5 people

In [13]:
print(X[0:5, 1])

[241.89356318 162.31047252 212.74085556 220.0424703  206.34980062]


In [14]:
np.mean(X[0:5, 1])

208.667432437037

## Activity: what is the weight of the 9th subject? 

In [32]:
print(X[8, 0])

67.018949662883


## Activity: Subtract the mean from corresponding columns

In [26]:
np.mean(X, axis=0)

array([ 69.32138568, 187.97774338])

In [27]:
X - np.mean(X, axis=0)

array([[  4.52563134,  53.9158198 ],
       [ -0.53948164, -25.66727086],
       [  4.78871971,  24.76311217],
       [  2.40959272,  32.06472692],
       [  0.56041018,  18.37205724],
       [ -2.06836999, -35.76558762],
       [ -0.53630443,  -4.04985478],
       [ -0.97287017, -20.00663289],
       [ -2.30243602, -12.04830299],
       [ -5.8648917 , -31.57806699]])

In [28]:
X - np.mean(X, axis=1)

ValueError: operands could not be broadcast together with shapes (10,2) (10,) 

In [29]:
# This is meaningless for our dataset
np.mean(X, axis=1)

array([157.8702901 , 115.54618828, 143.42548047, 145.88672435,
       138.11579824, 109.73258572, 126.35648493, 118.159813  ,
       121.47419503, 109.92808518])

In [23]:
np.mean(X[0, :])

157.870290098976

In [24]:
np.mean(X[1, :])

115.54618828359514

## Column-wise addition, row-wise addition

In [36]:
import numpy as np

A = np.array([[1,2],[3,4]])
print(A)
print(np.sum(A))  # Compute sum of all elements; prints "10"
print(np.sum(A, axis=0))  # Compute sum of each column; prints "[4 6]"
print(np.sum(A, axis=1))  # Compute sum of each row; prints "[3 7]"

[[1 2]
 [3 4]]
10
[4 6]
[3 7]


## Broadcasting in Numpy

In [85]:
A = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])

# no need to do A + np.ones((4, 1))*v
A + v

array([[ 2,  2,  4],
       [ 5,  5,  7],
       [ 8,  8, 10],
       [11, 11, 13]])

In [91]:
A.reshape(2, 6)

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]])

## Resources

- http://cs231n.github.io/python-numpy-tutorial/