## Task
Explore NumPy basics

## Notebook Summary
* Header
* Creating arrays
* Array data types
* Basic indexing
* Boolean indexing
* Fancy indexing
* Transposing
* Meshgrid, where
* Random walks

## Reference
* *Python for Data Analysis*, Wes McKinney

In [2]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
print 'numpy.version = ', np.__version__
import pandas as pd
print 'pandas.version = ', pd.__version__

% matplotlib inline
import matplotlib
print 'matplotlib.version = ', matplotlib.__version__
import matplotlib.pyplot as plt

import seaborn as sns
print 'seaborn.version = ', sns.__version__


numpy.version =  1.11.2
pandas.version =  0.19.1




matplotlib.version =  1.5.3
seaborn.version =  0.7.1


In [3]:
# Creating arrays using array & asarray

mylist = [1,2,3,4]
mylist
myarr = np.array(mylist)
myarr
myarr.dtype

myarr = np.array([5.1,6.2,7.3,8.4])
myarr
myarr.dtype

myarr2 = np.array(myarr)
myarr2

mylist = [[1,2,3],[4,5,6],[7,8,9]]
mylist
myarr = np.array(mylist)
myarr
myarr.ndim
myarr.shape
myarr.dtype
myarr.astype(float)
myarr.astype(str)
myarr

mylist = [1, 2, 3,4]
myarr = np.asarray(mylist)
myarr


[1, 2, 3, 4]

array([1, 2, 3, 4])

dtype('int64')

array([ 5.1,  6.2,  7.3,  8.4])

dtype('float64')

array([ 5.1,  6.2,  7.3,  8.4])

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

2

(3, 3)

dtype('int64')

array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.],
       [ 7.,  8.,  9.]])

array([['1', '2', '3'],
       ['4', '5', '6'],
       ['7', '8', '9']], 
      dtype='|S21')

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

array([1, 2, 3, 4])

In [4]:
# Creating arrays using zeros, ones, empty, identity, diag

np.zeros(1)
np.zeros([2,3,2])
np.zeros_like(myarr)

print '---'

np.ones(2)
np.ones([1,2,3], dtype='int')
np.ones_like(myarr)

print '---'

np.empty(3)
np.empty([2,3], dtype=int)
np.empty_like(myarr)

print '---'

np.eye(5)
np.identity(4)


np.diag([1,2,3])


array([ 0.])

array([[[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]]])

array([0, 0, 0, 0])

---


array([ 1.,  1.])

array([[[1, 1, 1],
        [1, 1, 1]]])

array([1, 1, 1, 1])

---


array([ 0.,  1.,  2.])

array([[0, 0, 0],
       [0, 0, 0]])

array([0, 0, 0, 0])

---


array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [5]:
# Array data types

np.ones([2,3])
np.ones([2,3]).astype(int)
np.ones([2,3]).astype(str)

np.array([3.14, 4.25, 5.36])
np.array([3.14, 4.25, 5.36]).astype(int)
np.array([3.14, 4.25, 5.36]).astype(str)

np.array(['1.1', '2.2', '3', '4'])
np.array(['1.1', '2.2', '3', '4']).astype(str)
np.array(['1.1', '2.2', '3', '4']).astype(float)
np.array(['1.1', '2.2', '3', '4']).astype(myarr.dtype)
np.array(['1.1', '2.2', '3', '4']).astype(int)


array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

array([[1, 1, 1],
       [1, 1, 1]])

array([['1.0', '1.0', '1.0'],
       ['1.0', '1.0', '1.0']], 
      dtype='|S32')

array([ 3.14,  4.25,  5.36])

array([3, 4, 5])

array(['3.14', '4.25', '5.36'], 
      dtype='|S32')

array(['1.1', '2.2', '3', '4'], 
      dtype='|S3')

array(['1.1', '2.2', '3', '4'], 
      dtype='|S3')

array([ 1.1,  2.2,  3. ,  4. ])

ValueError: invalid literal for int() with base 10: '1.1'

In [6]:
# Basic indexing & slicing

myarr = np.arange(10)
myarr
myarr[1]
myslice = myarr[4:6]
myslice

myslice[:] = -999
myarr

myslice = myarr[4:6].copy()
myslice[:] = -1
myslice
myarr

print '---'

myarr = np.arange(24).reshape(6,4)
myarr

myarr[5], myarr[5].shape
myarr[5:,:], myarr[5:,:].shape

myarr[0,1]

myarr[:,1]
myarr[:,[2,3]]
myarr[:,:3], myarr[:,:3].shape

myarr2 = myarr.copy()
myarr2[:,[2,3]] = -99
myarr2

myarr[1,:]
myarr[[2,3],:]
myarr[:2, 2:], myarr[:2, 2:].shape

myarr[2,2]
myarr[2][2]

# Need to mix integer and slices to get lower dimensional slices
# See Fig 4.2 on pg 90 in Python for Data Analysis


array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

1

array([4, 5])

array([   0,    1,    2,    3, -999, -999,    6,    7,    8,    9])

array([-1, -1])

array([   0,    1,    2,    3, -999, -999,    6,    7,    8,    9])

---


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

(array([20, 21, 22, 23]), (4,))

(array([[20, 21, 22, 23]]), (1, 4))

1

array([ 1,  5,  9, 13, 17, 21])

array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15],
       [18, 19],
       [22, 23]])

(array([[ 0,  1,  2],
        [ 4,  5,  6],
        [ 8,  9, 10],
        [12, 13, 14],
        [16, 17, 18],
        [20, 21, 22]]), (6, 3))

array([[  0,   1, -99, -99],
       [  4,   5, -99, -99],
       [  8,   9, -99, -99],
       [ 12,  13, -99, -99],
       [ 16,  17, -99, -99],
       [ 20,  21, -99, -99]])

array([4, 5, 6, 7])

array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])

(array([[2, 3],
        [6, 7]]), (2, 2))

10

10

In [7]:
# Boolean indexing

myarr = np.arange(20)
myarr
idx = myarr % 4 == 0
idx
myarr[idx]
myarr[myarr % 4 == 0]

print '---'

myarr = np.arange(32).reshape(8,4)
myarr

# statement below will not return 1st & 3rd cols because True, False are interpreted as 1, 0 resulting in myarr[:,[1,0,1,0]]
myarr[:,[True, False, True, False]] 
# instead, do this
myarr[:,np.array([True, False, True, False])]

myarr[[0,1,2],2:]

print '---'

myarr = np.random.randn(32).reshape(8,4)
myarr[myarr<0] = 0
myarr = myarr.round(2)
myarr[1] = 7
myarr


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

array([ True, False, False, False,  True, False, False, False,  True,
       False, False, False,  True, False, False, False,  True, False,
       False, False], dtype=bool)

array([ 0,  4,  8, 12, 16])

array([ 0,  4,  8, 12, 16])

---


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])



array([[ 1,  0,  1,  0],
       [ 5,  4,  5,  4],
       [ 9,  8,  9,  8],
       [13, 12, 13, 12],
       [17, 16, 17, 16],
       [21, 20, 21, 20],
       [25, 24, 25, 24],
       [29, 28, 29, 28]])

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10],
       [12, 14],
       [16, 18],
       [20, 22],
       [24, 26],
       [28, 30]])

array([[ 2,  3],
       [ 6,  7],
       [10, 11]])

---


array([[ 0.12,  0.  ,  0.  ,  0.  ],
       [ 7.  ,  7.  ,  7.  ,  7.  ],
       [ 0.54,  1.77,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.68,  0.33,  0.  ],
       [ 0.  ,  0.  ,  0.62,  1.45],
       [ 0.53,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  1.66,  0.  ,  1.24]])

In [9]:
# Fancy indexing - indexing using integer arrays

myarr = np.arange(32).reshape(8,4)
myarr

myarr[[0,1],:]
myarr[[0,1]]
myarr[np.array([0,1])]
myarr[[1,0,1],:]

print '---'

myarr[:,[0,1]]
myarr[:,[-1, -2]]

print '---'

myarr[[0,1],[-3,1]] # values returned are those @ [0,-3] & [1,1]
# compare to
myarr[[0,1]][:,[-3,1]] 
# and see also
myarr[np.ix_([0,1],[-3,1])]


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

array([[4, 5, 6, 7],
       [0, 1, 2, 3],
       [4, 5, 6, 7]])

---


array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13],
       [16, 17],
       [20, 21],
       [24, 25],
       [28, 29]])

array([[ 3,  2],
       [ 7,  6],
       [11, 10],
       [15, 14],
       [19, 18],
       [23, 22],
       [27, 26],
       [31, 30]])

---


array([1, 5])

array([[1, 1],
       [5, 5]])

array([[1, 1],
       [5, 5]])

In [10]:
# Transposing arrays
# all transposing return a view on the array and not a copy

myarr = np.arange(15).reshape(5,3)
myarr

myarr.transpose()
myarr.T

myarr.shape
myarr.T.shape

np.dot(myarr.T, myarr) # X'X

# swapaxes - both statements below are equivalent
myarr.swapaxes(0,1)
myarr.swapaxes(1,0)

myarr = np.arange(24).reshape(2,3,4)
myarr
myarr.swapaxes(0,2)

myarr.transpose(4,2,3)


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

(5, 3)

(3, 5)

array([[270, 300, 330],
       [300, 335, 370],
       [330, 370, 410]])

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

array([[[ 0, 12],
        [ 4, 16],
        [ 8, 20]],

       [[ 1, 13],
        [ 5, 17],
        [ 9, 21]],

       [[ 2, 14],
        [ 6, 18],
        [10, 22]],

       [[ 3, 15],
        [ 7, 19],
        [11, 23]]])

ValueError: invalid axis for this array

In [11]:
# Meshgrid

x = np.array([1,2,3])
y = np.array([4,5,6])

x2, y2 = np.meshgrid(x,y)
x2
y2

x2 + y2


array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

array([[4, 4, 4],
       [5, 5, 5],
       [6, 6, 6]])

array([[5, 6, 7],
       [6, 7, 8],
       [7, 8, 9]])

In [12]:
# where

myarr = np.random.randn(4,4)
myarr = np.round(myarr,2)
myarr

np.where(myarr < 0, 0, 99)
np.where(myarr < 0, 0, myarr)


array([[-0.24,  0.81,  2.15,  0.17],
       [ 0.44,  1.22,  1.24,  0.74],
       [ 0.4 ,  0.79, -0.3 ,  0.76],
       [ 1.17, -1.62,  0.39, -0.22]])

array([[ 0, 99, 99, 99],
       [99, 99, 99, 99],
       [99, 99,  0, 99],
       [99,  0, 99,  0]])

array([[ 0.  ,  0.81,  2.15,  0.17],
       [ 0.44,  1.22,  1.24,  0.74],
       [ 0.4 ,  0.79,  0.  ,  0.76],
       [ 1.17,  0.  ,  0.39,  0.  ]])

In [13]:
# Random walks

print 'Single random walk'
walk = np.random.randint(0,2,1000)
walk = np.where(walk>0, 1, -1)
walk = walk.cumsum()

# plt.plot(walk);

walk.min()
walk.max()

# time for walk to exceed 10 from origin
(abs(walk)>10).argmax()

print '---'
print 'Multiple random walks'

steps = np.random.randn(100, 1000)
steps.shape
walk = steps.cumsum(axis=1)
walk.shape

walk
(walk > 10).any(1) # get all walks that cross +10
(walk > 10).any(1).sum() # how many walks cross +10?

crossing_times = walk[(walk > 10).any(1)].argmax(axis=1) # when does each walk that cross +10 cross +10?
crossing_times
crossing_times.mean()


Single random walk


-7

34

178

---
Multiple random walks


(100, 1000)

(100, 1000)

array([[  0.31802541,  -1.33957098,  -0.21431708, ..., -51.3492799 ,
        -50.85109417, -51.28332017],
       [  0.77011331,   0.60729373,  -0.80297991, ..., -20.64113584,
        -21.89226311, -21.94830822],
       [ -1.09356511,  -1.72763304,  -2.08986624, ...,  39.43550392,
         38.70066617,  38.51549964],
       ..., 
       [ -0.67689754,   1.28182846,   0.76756105, ...,  46.18079709,
         46.16942317,  46.26582822],
       [ -1.79755416,  -2.65035402,  -2.10296836, ...,  -8.74279536,
         -7.36441623,  -8.05095313],
       [  0.22783953,  -2.26614744,  -4.4429862 , ...,   4.36115223,
          4.03182597,   3.89883578]])

array([False, False,  True,  True, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False,  True,  True,  True,  True,  True,  True,
        True,  True, False,  True,  True, False,  True,  True,  True,
       False, False,  True,  True,  True, False, False, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True, False,  True, False,
        True,  True,  True,  True,  True, False, False,  True,  True,
        True,  True, False,  True,  True,  True,  True, False,  True,
        True,  True, False,  True,  True, False,  True, False,  True,
        True,  True,  True,  True,  True, False, False,  True, False,  True], dtype=bool)

75

array([786, 949, 822, 909, 993, 862, 956, 171, 499, 691, 557,  50, 923,
       601, 771, 227, 300, 957, 408, 998, 918, 677, 994, 261, 905, 619,
       880, 351, 983, 679, 877, 891, 914, 940,  88,  69, 614, 840, 985,
       155, 169, 760, 558, 632, 618, 960, 510, 242, 997, 700, 999, 809,
       463, 252, 989, 995, 654, 961, 932, 304, 790, 404, 696, 578, 639,
       608, 965, 662, 195, 970, 652, 939, 207, 985, 356])

669.60000000000002