# Learning Resources

Reference for all of numpy:

http://docs.scipy.org/doc/numpy/reference/

Supplemental materials in case your interested in more than just the video lectures:

http://cs231n.github.io/python-numpy-tutorial/ 

# Creating Arrays

In [1]:
# Creating Numpy arrays
import numpy as np

# Converting from a list
#Lets start with a list

my_list1 = [1,2,3,4]

my_array1 = np.array(my_list1)


In [2]:
#Print out array

my_array1

array([1, 2, 3, 4])

In [3]:
# Make another list
my_list2 = [11,22,33,44]

#Make a list of lists
my_lists = [my_list1,my_list2]

#Make multi-dimensional array
my_array2 = np.array(my_lists)

#Show array
my_array2

array([[ 1,  2,  3,  4],
       [11, 22, 33, 44]])

In [4]:
#Lets get the size of the array
my_array2.shape

(2, 4)

In [5]:
#Find out the data tyoe of the array
my_array2.dtype

dtype('int32')

In [6]:
#Making special case arrays

#Zeros
np.zeros(5)

array([ 0.,  0.,  0.,  0.,  0.])

In [7]:
#Ones
np.ones((5,5))

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [11]:
# An empty array

np.empty(5)
np.empty((3,4))

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [12]:
#Identity array
np.eye(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [13]:
# Using a range

np.arange(5)

array([0, 1, 2, 3, 4])

In [15]:
np.arange(5, 50, 2)

array([ 5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37,
       39, 41, 43, 45, 47, 49])

# Using arrays and scalars

In [16]:
# Create array
arr1 = np.array([[1,2,3],[8,9,10]])

#Show
arr1

array([[ 1,  2,  3],
       [ 8,  9, 10]])

In [17]:
#Multiplying Arrays
arr1*arr1

array([[  1,   4,   9],
       [ 64,  81, 100]])

In [18]:
#Subtraction
arr1-arr1

array([[0, 0, 0],
       [0, 0, 0]])

In [19]:
#Arithmetic operations with scalars on array
1 / arr1

array([[ 1.        ,  0.5       ,  0.33333333],
       [ 0.125     ,  0.11111111,  0.1       ]])

In [20]:
#Exponential operation
arr1 ** 3

array([[   1,    8,   27],
       [ 512,  729, 1000]], dtype=int32)

# Indexing Arrays

In [24]:
#Creating sample array
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [25]:
#Get a value at an index
arr[8]

8

In [26]:
#Get values in a range
arr[1:5]

array([1, 2, 3, 4])

In [27]:
#Get values in a range
arr[0:5]

array([0, 1, 2, 3, 4])

In [28]:
#Setting a value with index range (Broadcasting)
arr[0:5]=100

#Show
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [29]:
# Reset array, we'll see why i had to reset in  a moment
arr = np.arange(0,11)

#Show
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [30]:
#Important notes on Slices
slice_of_arr = arr[0:6]

#Show slice
slice_of_arr

array([0, 1, 2, 3, 4, 5])

In [31]:
#Change Slice
slice_of_arr[:]=99

#Show Slice again
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [32]:
# Now note the changes also occur in our original array!
arr

# Data is not copied, it's a view of the original array! This avoids memory problems!


array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [33]:
#To get a copy, need to be explicit
arr_copy = arr.copy()

arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [34]:
# Indexing a 2D array

arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))

#Show
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [35]:
#Indexing row
arr_2d[1]

array([20, 25, 30])

In [36]:
# Format is arr_2d[row][col] or arr_2d[row,col]

# Getting individual element value
arr_2d[1][0]


20

In [37]:
# Getting individual element value
arr_2d[1,0]

20

In [38]:
# 2D array slicing

#Shape (2,2) from top right corner
arr_2d[:2,1:]


array([[10, 15],
       [25, 30]])

In [39]:
#Shape bottom row
arr_2d[2]

array([35, 40, 45])

In [40]:
#Shape bottom row
arr_2d[2,:]

array([35, 40, 45])

In [41]:
# Fancy Indexing

#Set up matrix
arr2d = np.zeros((10,10))

In [42]:
#Length of array
arr_length = arr2d.shape[1]

In [43]:
#Set up array

for i in range(arr_length):
    arr2d[i] = i
    
arr2d

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.],
       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.]])

In [44]:
#Fancy indexing allows the following
arr2d[[2,4,6,8]]

array([[ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],
       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],
       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],
       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.]])

In [45]:
#Allows in any order
arr2d[[6,4,2,7]]

array([[ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],
       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],
       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],
       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.]])

# Array Transposition

In [46]:
arr2d

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.,  7.],
       [ 8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.,  8.],
       [ 9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.,  9.]])

In [47]:
#Lets transpose
arr2d.T

array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
       [ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]])

In [48]:
# For 3D matrix
arr3d = np.arange(50).reshape((5,5,2))

#Show
arr3d

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7],
        [ 8,  9]],

       [[10, 11],
        [12, 13],
        [14, 15],
        [16, 17],
        [18, 19]],

       [[20, 21],
        [22, 23],
        [24, 25],
        [26, 27],
        [28, 29]],

       [[30, 31],
        [32, 33],
        [34, 35],
        [36, 37],
        [38, 39]],

       [[40, 41],
        [42, 43],
        [44, 45],
        [46, 47],
        [48, 49]]])

In [49]:
#We can also transpose a 3d matrix

arr3d.transpose((1,0,2))

array([[[ 0,  1],
        [10, 11],
        [20, 21],
        [30, 31],
        [40, 41]],

       [[ 2,  3],
        [12, 13],
        [22, 23],
        [32, 33],
        [42, 43]],

       [[ 4,  5],
        [14, 15],
        [24, 25],
        [34, 35],
        [44, 45]],

       [[ 6,  7],
        [16, 17],
        [26, 27],
        [36, 37],
        [46, 47]],

       [[ 8,  9],
        [18, 19],
        [28, 29],
        [38, 39],
        [48, 49]]])

# Universal Array Function

In [50]:
arr = np.arange(11)

arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [51]:
#Taking Square Roots
np.sqrt(arr)

array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ,
        3.16227766])

In [52]:
#Calcualting exponential (e^)
np.exp(arr)

array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03,   2.20264658e+04])

In [53]:
#Random array (normal dist)
A = np.random.randn(10)

A

array([-1.63828776,  0.50816901,  0.53857225, -0.64842349,  0.49642553,
       -0.44302233, -0.77528598,  0.48686183, -0.94718265, -1.58643576])

In [54]:
#Random array (normal dist)
B = np.random.randn(10)
B

array([-0.3289612 , -0.25996022, -1.10675234,  0.14342906,  0.89483324,
        0.15382248,  2.40247391, -0.72779432,  1.15657347,  1.04421525])

In [55]:
#Addition
np.add(A,B)

array([-1.96724897,  0.24820879, -0.56818009, -0.50499443,  1.39125877,
       -0.28919985,  1.62718793, -0.24093249,  0.20939082, -0.54222051])

In [56]:
#Finding max or min between two arrays
np.maximum(A,B)

array([-0.3289612 ,  0.50816901,  0.53857225,  0.14342906,  0.89483324,
        0.15382248,  2.40247391,  0.48686183,  1.15657347,  1.04421525])

In [57]:
#For full and extensive list of all universal functions
website = "http://docs.scipy.org/doc/numpy/reference/ufuncs.html#available-ufuncs"
import webbrowser
webbrowser.open(website)

True

# Array Processing

In [58]:
#Lets learn how to use the numpy where

#First the slow way to do things

A = np.array([1,2,3,4])

B = np.array([100,200,300,400])

#Now a boolean array
condition = np.array([True,True,False,False])

#Using a list comprehension
answer = [(A_val if cond else B_val) for A_val,B_val,cond in zip(A,B,condition)]

#Show the answer
answer

#Problems include speed issues and multi-dimensional array issues

[1, 2, 300, 400]

In [59]:
#Now using numpy.where

answer2 = np.where(condition,A,B)

#Show
answer2

array([  1,   2, 300, 400])

In [60]:
#Can use np.where  on 2d for manipulation

from numpy.random import randn

arr = randn(5,5)

#Show arr
arr

array([[ 0.7336922 , -0.34576064,  0.43882224, -0.3842146 , -1.08035843],
       [ 0.810475  ,  0.6746925 ,  0.6094487 ,  0.23411853,  0.63124195],
       [-1.73350404,  1.2932275 , -0.44563842, -0.23522468, -1.30532913],
       [-1.09009518, -0.0136684 , -1.36656192,  1.58576583,  1.95452915],
       [ 1.07553463,  0.92819263,  1.45407601, -0.67767013, -0.30262136]])

In [61]:
# Where array is less than zero, make that value zero, otherwise leave it as the array value
np.where(arr < 0,0,arr)

array([[ 0.7336922 ,  0.        ,  0.43882224,  0.        ,  0.        ],
       [ 0.810475  ,  0.6746925 ,  0.6094487 ,  0.23411853,  0.63124195],
       [ 0.        ,  1.2932275 ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  1.58576583,  1.95452915],
       [ 1.07553463,  0.92819263,  1.45407601,  0.        ,  0.        ]])

In [62]:
#Other Statistical Processing
arr = np.array([[1,2,3],[4,5,6],[7,8,9]])

arr


array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [63]:
#SUM
arr.sum()

45

In [64]:
#Can also do along an axis (we shold expect a 3 diff between the columns)
arr.sum(0)

array([12, 15, 18])

In [65]:
#Mean
arr.mean()

5.0

In [66]:
#Standard Deviation
arr.std()

2.5819888974716112

In [67]:
#Variance
arr.var()

6.666666666666667

In [68]:
#Also any and all for processing boolean arrays

bool_arr = np.array([True,False,True])

#For any True
bool_arr.any()

True

In [69]:
# For all True
bool_arr.all()

False

In [70]:
# Finally sort array

#Create a random array
arr = randn(5)
#show
arr

array([-0.42240964,  0.99904915, -0.2969781 ,  0.24405714, -0.84526357])

In [71]:
#Sort it
arr.sort()
#show
arr

array([-0.84526357, -0.42240964, -0.2969781 ,  0.24405714,  0.99904915])

In [72]:
#Lets learn about unique
countries = np.array(['France', 'Germany', 'USA', 'Russia','USA','Mexico','USA','USA','Germany'])

np.unique(countries)

array(['France', 'Germany', 'Mexico', 'Russia', 'USA'],
      dtype='<U7')