In [1]:
import numpy as np

In [2]:
def ln_break():
    print "\n", "*" * 10, "\n"

## Shaping, Reshaping, and Combining Arrays

In [13]:
arr1 = np.arange(0,15).reshape(3,5)
print arr1

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


In [14]:
arr1[0][2] = -2
print arr1

[[ 0  1 -2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]


In [15]:
arr1, arr2 = arr1[:,:-1], arr1[:,-1] #functionally equivalent to .pop() with lists or DataFrames
print arr1
ln_break()
print arr2

[[ 0  1 -2  3]
 [ 5  6  7  8]
 [10 11 12 13]]

********** 

[ 4  9 14]


In [16]:
#np.concatenate((arr1,arr2), axis=1) #throws error because arr2 is 1-D array
print arr1.shape
ln_break
print arr2.shape

(3, 4)
(3,)


In [17]:
np.concatenate((arr1, arr2[:, np.newaxis]), axis=1) #add an axis

array([[ 0,  1, -2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [18]:
print type(np.newaxis)  #the newaxis is a Nonetype

<type 'NoneType'>


In [19]:
np.hstack((arr1, arr2.reshape(len(arr2), 1))) #another way

array([[ 0,  1, -2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [20]:
arr1 = np.concatenate((arr1, arr2[:, np.newaxis]), axis=1) #let's go ahead and recreate
arr1

array([[ 0,  1, -2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [21]:
arr3 = np.linspace(15,20,5) #gives us five floating point numbers evenly spaced between start and stop (inclusive!)
print arr3

[ 15.    16.25  17.5   18.75  20.  ]


In [22]:
print arr3.dtype #yup, definitely floats
ln_break()
print arr3.astype(int) #casting array as integers (always rounds down)
ln_break()
print arr3 #but, oh wait, jk! That didn't work, just created a copy.
arr3 = arr3.astype(int)
arr3[4] = 19
arr3

float64

********** 

[15 16 17 18 20]

********** 

[ 15.    16.25  17.5   18.75  20.  ]


array([15, 16, 17, 18, 19])

In [23]:
print arr3.shape #still only 1-D
print arr3.size #there's also size
print arr3
ln_break()

print arr3.reshape(arr3.shape[0], 1) #don't forget to add a dimension
print arr3.shape  #but, oh wait, jk! that didnt work. just like astype() above, reshape() only makes a copy

(5,)
5
[15 16 17 18 19]

********** 

[[15]
 [16]
 [17]
 [18]
 [19]]
(5,)


In [24]:
print arr1
print arr1.shape
ln_break()


[[ 0  1 -2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
(3, 5)

********** 



In [25]:
print arr3
print arr3.shape
arr
ln_break()

print arr3.T #makes a copy
print arr3
ln_break()

print arr3.transpose() #makes a copy
print arr3
ln_break()

print np.transpose(arr3) #makes a copy
print arr3


[15 16 17 18 19]
(5,)

********** 

[15 16 17 18 19]
[15 16 17 18 19]

********** 

[15 16 17 18 19]
[15 16 17 18 19]

********** 

[15 16 17 18 19]
[15 16 17 18 19]


In [27]:
print arr1
print arr1.shape

ln_break()

print arr3
print arr3.shape
arr3 = arr3.T
print arr3

arr4 = np.vstack((arr1, arr3))
arr4

# np.vstack((arr1,arr3)) #throws an error. Their dimensions don't match



[[ 0  1 -2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
(3, 5)

********** 

[15 16 17 18 19]
(5,)
[15 16 17 18 19]


array([[ 0,  1, -2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [49]:
arr5 = np.random.randint(-5,5,5)

In [50]:
arr5

array([ 0,  3, -3,  1, -2])

In [51]:
arr5 < 0

array([False, False,  True, False,  True], dtype=bool)

In [52]:
arr4[:,arr5<0]

array([[-2,  4],
       [ 7,  9],
       [12, 14],
       [17, 19]])

In [53]:
arr4[:,arr5] #What is happening here? Whoa, you are actually passing it indices. Super weird.

array([[ 0,  3, -2,  1,  3],
       [ 5,  8,  7,  6,  8],
       [10, 13, 12, 11, 13],
       [15, 18, 17, 16, 18]])

In [34]:
arr4

array([[ 0,  1, -2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

## Boolean indexing

### Logical operators

In [55]:
simulations = 100
roll_1 = np.random.randint(1,6, simulations)
roll_2 = np.random.randint(1,6, simulations)
combined_rolls = roll_1 + roll_2
combined_rolls

array([ 3,  5,  6,  4,  8,  6,  6,  8,  6,  5,  3,  6,  6,  8,  6,  9, 10,
        7,  6,  5,  6,  8,  7,  5,  5,  6,  6,  5,  5,  7,  6,  6,  8,  4,
        7,  8,  6,  6,  5,  7,  9,  6,  4,  8,  9,  5,  4,  4,  6,  5,  5,
        9,  6,  8, 10,  9,  8,  2,  3,  8,  7,  2,  4,  6,  3, 10,  8,  6,
        4,  7,  5,  5,  6,  6,  6,  8,  6,  4,  3,  6,  7, 10,  3,  8,  4,
        9,  3,  7,  3,  6,  3,  7,  6, 10,  6,  8,  4,  8,  9,  5])

In [56]:
combined_rolls[combined_rolls > 7]

array([ 8,  8,  8,  9, 10,  8,  8,  8,  9,  8,  9,  9,  8, 10,  9,  8,  8,
       10,  8,  8, 10,  8,  9, 10,  8,  8,  9])

In [58]:
combined_rolls[combined_rolls % 2 == 0]

array([ 6,  4,  8,  6,  6,  8,  6,  6,  6,  8,  6, 10,  6,  6,  8,  6,  6,
        6,  6,  8,  4,  8,  6,  6,  6,  4,  8,  4,  4,  6,  6,  8, 10,  8,
        2,  8,  2,  4,  6, 10,  8,  6,  4,  6,  6,  6,  8,  6,  4,  6, 10,
        8,  4,  6,  6, 10,  6,  8,  4,  8])

In [59]:
combined_rolls[(combined_rolls > 7) | (combined_rolls % 2 == 0)]

array([ 6,  4,  8,  6,  6,  8,  6,  6,  6,  8,  6,  9, 10,  6,  6,  8,  6,
        6,  6,  6,  8,  4,  8,  6,  6,  9,  6,  4,  8,  9,  4,  4,  6,  9,
        6,  8, 10,  9,  8,  2,  8,  2,  4,  6, 10,  8,  6,  4,  6,  6,  6,
        8,  6,  4,  6, 10,  8,  4,  9,  6,  6, 10,  6,  8,  4,  8,  9])

In [62]:
successes = combined_rolls[(combined_rolls > 7) | (combined_rolls % 2 == 0)]

In [63]:
successes.size

67

In [60]:
combined_rolls > 7

array([False, False, False, False,  True, False, False,  True, False,
       False, False, False, False,  True, False,  True,  True, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False,  True, False, False,  True,
       False, False, False, False,  True, False, False,  True,  True,
       False, False, False, False, False, False,  True, False,  True,
        True,  True,  True, False, False,  True, False, False, False,
       False, False,  True,  True, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
        True, False,  True, False,  True, False, False, False, False,
       False, False, False,  True, False,  True, False,  True,  True, False], dtype=bool)

In [61]:
combined_rolls % 2 == 0

array([False, False,  True,  True,  True,  True,  True,  True,  True,
       False, False,  True,  True,  True,  True, False,  True, False,
        True, False,  True,  True, False, False, False,  True,  True,
       False, False, False,  True,  True,  True,  True, False,  True,
        True,  True, False, False, False,  True,  True,  True, False,
       False,  True,  True,  True, False, False, False,  True,  True,
        True, False,  True,  True, False,  True, False,  True,  True,
        True, False,  True,  True,  True,  True, False, False, False,
        True,  True,  True,  True,  True,  True, False,  True, False,
        True, False,  True,  True, False, False, False, False,  True,
       False, False,  True,  True,  True,  True,  True,  True, False, False], dtype=bool)

## Filtering rows based on values in the rows

In [3]:
'''
INPUT: 2 DIMENSIONAL NUMPY ARRAY
OUTPUT: 2 DIMENSIONAL NUMPY ARRAY

Return a numpy array containing only the rows from arr where all the values
are positive.

E.g.  [[1, -1, 2], [3, 4, 2], [-8, 4, -4]]  ->  [[3, 4, 2]]
'''

'\nINPUT: 2 DIMENSIONAL NUMPY ARRAY\nOUTPUT: 2 DIMENSIONAL NUMPY ARRAY\n\nReturn a numpy array containing only the rows from arr where all the values\nare positive.\n\nE.g.  [[1, -1, 2], [3, 4, 2], [-8, 4, -4]]  ->  [[3, 4, 2]]\n'

In [4]:
arr = np.array([[1, 2, 3], [4, -5, -6], [-7, 8, 9], [10, 11, 12]])
print "example input: \n{}".format(arr)
ln_break()
print "example output: [[1, 2, 3], [10, 11, 12]]"

example input: 
[[ 1  2  3]
 [ 4 -5 -6]
 [-7  8  9]
 [10 11 12]]

********** 

example output: [[1, 2, 3], [10, 11, 12]]


In [5]:
print arr[np.min(arr, axis=1) > 0]  #Given solution

[[ 1  2  3]
 [10 11 12]]


In [6]:
print arr[arr > 0]  #flattens array..... Why??

[ 1  2  3  4  8  9 10 11 12]


In [7]:
print arr>0 #2-D Booleans

[[ True  True  True]
 [ True False False]
 [False  True  True]
 [ True  True  True]]


In [8]:
print np.where(arr>0) #tuple of arrays of indices where condition is True

(array([0, 0, 0, 1, 2, 2, 3, 3, 3]), array([0, 1, 2, 0, 1, 2, 0, 1, 2]))


In [9]:
print np.min(arr, 1) #reminder of how .min() works
ln_break()
print np.min(arr, 0) #reminder of how .min() works

[ 1 -6 -7 10]

********** 

[-7 -5 -6]


In [10]:
print (arr>0).all() #gives False for entire array (bc array contains at least one instance where conditional evaluates to false)
ln_break()
print (arr>0).all(axis=1) #gives True or False for each row of the array

False

********** 

[ True False False  True]


In [11]:
print arr[(arr>0).all(axis=1)] #Alternative solution!!!!

[[ 1  2  3]
 [10 11 12]]


In [12]:
print np.all(arr>0, axis=1) #Can also call .all() as a function

[ True False False  True]


### argmax
The argmax function in numpy returns the index of the maximum value in an array. This will be very useful when implementing the prediction step of Naive Bayes, since we will want to map the maximum posterior to our set of possible classes.

In [3]:
arr = np.array([0.5, 0.99, 0.09])
np.argmax(arr)

1