In [1]:
# http://people.duke.edu/~ccc14/pcfb/numpympl/NumpyBasics.html
# https://docs.scipy.org/doc/numpy-dev/user/quickstart.html
# http://www.engr.ucsb.edu/~shell/che210d/numpy.pdf
# http://www.labri.fr/perso/nrougier/teaching/numpy/numpy.html
# http://www.labri.fr/perso/nrougier/teaching/numpy.100/

# NUMPY TO MATLAB - https://docs.scipy.org/doc/numpy-dev/user/numpy-for-matlab-users.html

In [2]:
import numpy as np

## NDARRAY
An ndaray is a n-dimensional array where all items are of the same type (unlike a Python data structure) and consequently use the same amount of space. There are 21 different types of objects (also called dtypes) that can be stored in ndarray. They are
* bool_ 
* byte
* short
* intc
* int_
* longlong
* intp
* ubyte
* ushort
* uintc
* uint
* ulonglong
* uintp
* single
* float_
* longfloat
* csingl
* complex_
* clongfloat
* object_
* str_
* unicode_
* void

For some of the dtypes, a _ to differentiate that dtype from the corresponding Python type. Such types are also called as 'enhanced scalars).  They have the same precision as the Python type.

All the types except the str_, unicode_ and void are of fixed size. 

In [6]:
# Creating a simple ndarray
a = np.arange(10) # similar to range(8)
print(a, type(a))


[0 1 2 3 4 5 6 7 8 9] <class 'numpy.ndarray'>


In [7]:
# Indexing
print(a[3], type(a[3])) # since there is only one value, its type is the type of each element
print(a[1:5], type(a[2:5])) # The slicing results in an ndarray

3 <class 'numpy.int64'>
[1 2 3 4] <class 'numpy.ndarray'>


In [8]:
# Universal functions or ufunc
# They perform element by element operation on an ndarray.
b = np.sin(a)
print(b)

c = np.add(a, b)
print(c)

# For a full list of ufunc, visit 
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html

[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866   0.98935825  0.41211849]
[ 0.          1.84147098  2.90929743  3.14112001  3.2431975   4.04107573
  5.7205845   7.6569866   8.98935825  9.41211849]


In [9]:
# In the case of add function, a and b both had same sized vector.
# What happens if they are of different sizes as in the example below
d = np.add(a, 3)
print(d)

# The meaning of adding a which is a vector to a scalar 3 is done by 
# adding each element in a with the value 3. In otherwords, the value 3
# was 'broadcast' to each element in a and added.

[ 3  4  5  6  7  8  9 10 11 12]


In [10]:
# NDARRAY attributes

print("Shape is: ", b.shape) # Size of the matrix
print("Number of dimensions are: ", b.ndim) # Number of dimensions
print("Data type: ", b.dtype) # Data type of each element
print("Itemsize: ", b.itemsize) # Memory occupied by each element
print("type: ", type(b))  # Type of b
print("dir: ", dir(b.dtype))

Shape is:  (10,)
Number of dimensions are:  1
Data type:  float64
Itemsize:  8
type:  <class 'numpy.ndarray'>
dir:  ['__bool__', '__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', 'alignment', 'base', 'byteorder', 'char', 'descr', 'fields', 'flags', 'hasobject', 'isalignedstruct', 'isbuiltin', 'isnative', 'itemsize', 'kind', 'metadata', 'name', 'names', 'ndim', 'newbyteorder', 'num', 'shape', 'str', 'subdtype', 'type']


### ARRAY CONVERSION

In [19]:
print(b.tolist()) # convert ndarray b to list

[0.0, 0.8414709848078965, 0.9092974268256817, 0.1411200080598672, -0.7568024953079282, -0.9589242746631385, -0.27941549819892586, 0.6569865987187891, 0.9893582466233818, 0.4121184852417566]


In [20]:
# Write the vector b to a csv file with 3 precision
b.tofile(file="data.csv", sep=",", format="%0.3f")

In [21]:
b.dump('data.pickle') # Write to pickle file

In [11]:
np.save('b.npy', [a, b])
# Read more about npy format at https://docs.scipy.org/doc/numpy/neps/npy-format.html
c1, d1 = np.load('b.npy')
print(c1, d1)

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9.] [ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866   0.98935825  0.41211849]


### ARRAY SHAPE 

In [18]:
a = np.arange(8)
b = a.reshape((2, 4), order='C') # Change the matrix to size 2x4
b = a.reshape((2, 4), order='F')

print(b)

[[0 2 4 6]
 [1 3 5 7]]


In [27]:
c = a.resize((4, 2))
print(c) # None as resize modifies inplace while reshape creates a new array
print(a)

None
[[0 1]
 [2 3]
 [4 5]
 [6 7]]


In [28]:
a = np.arange(8).reshape((2, 4), order='C' )
flat_a = a.flatten()
print(flat_a) # Convert to 1D vector
flat_a[0]= 6
print(flat_a)
print(a) 
# Since flat_a is a new copy, any change to flat_a does not affect a

[0 1 2 3 4 5 6 7]
[6 1 2 3 4 5 6 7]
[[0 1 2 3]
 [4 5 6 7]]


In [19]:
a = np.arange(8).reshape((2, 4), order='C' )
ravel_a = a.ravel() # Convert to 1D vector
print(ravel_a)
ravel_a[0]= 6
print(ravel_a)
print(a)
# Since ravel_a is NOT a copy, any change to ravel_a affects a

[0 1 2 3 4 5 6 7]
[6 1 2 3 4 5 6 7]
[[6 1 2 3]
 [4 5 6 7]]


In [30]:
# Shallow copy
d = a
a[0][0] = 4
print(a)
print(d) # Note d and a will have same values even though we modified only a

[[4 1 2 3]
 [4 5 6 7]]
[[4 1 2 3]
 [4 5 6 7]]


In [31]:
# Deep copy
d = a.copy()
a[0][0] = 3
print(a)
print(d) # Note d and a will have different values

[[3 1 2 3]
 [4 5 6 7]]
[[4 1 2 3]
 [4 5 6 7]]


In [76]:
# INCLASS ACTIVITY
'''
A power ball needs a list of 6 numbers. The first 5 numbers have value between 1 and 59.
The last number also called power ball number will be between 1 and 35. 
Write a Python program to create this list with 6 numbers. Modify the code
so that it is seeded by the current date. 
'''

mainno = np.random.rand(5,)*58
mainno = mainno.astype(np.int)

powerballno = 1+np.random.rand(1,)*35
powerballno = powerballno.astype(np.int)
print(mainno,powerballno)
allno = np.concatenate([mainno,powerballno])
print(allno)

[12 46 20 10 26] [22]
[12 46 20 10 26 22]


In [64]:
'''
A power ball needs a list of 6 numbers. The first 5 numbers have value between 1 and 59.
The last number also called power ball number will be between 1 and 35. 
Write a Python program to create this list with 6 numbers. Modify the code
so that it is seeded by the current date. 
'''
import random
from datetime import datetime
random.seed(datetime.today().day)

mainno = np.random.rand(5,)*58
mainno = mainno.astype(np.int)

powerball = 1+np.random.rand(1,)*35
powerball = powerball.astype(np.int)
allno = np.concatenate([mainno, powerball])
print(allno)

[ 9 25  1 34 38  3]


## ARRAY MANIPULATION

In [79]:
import numpy as np
a = np.random.rand(2, 4)
print(a)
a.sort() # sort(axis=-1, kind=’quick’, order=None)
print(a)

[[ 0.54276925  0.34794731  0.66112652  0.78535076]
 [ 0.01673111  0.6742457   0.30099448  0.5363158 ]]
[[ 0.34794731  0.54276925  0.66112652  0.78535076]
 [ 0.01673111  0.30099448  0.5363158   0.6742457 ]]


In [96]:
a = np.random.rand(2, 4)*2
a = a.astype(np.int)
print(a, a.dtype)
print(a.nonzero())

[[0 1 0 0]
 [0 1 1 1]] int64
(array([0, 1, 1, 1]), array([1, 1, 2, 3]))


### ARRAY CALCULATIONS

In [97]:
import numpy as np
a = np.random.rand(2, 2)*5
b = a.astype('int')
print(b)
print('Any element is {0}'.format(b.any()))
print('Sum of all elements is {0}'.format(b.sum()))
print('The product of all element is {0}'.format(b.prod()))
print('The max of all element is {0}'.format(b.max()))

[[2 0]
 [4 0]]
Any element is True
Sum of all elements is 6
The product of all element is 0
The max of all element is 4


### ARRAY INDEXING

In [98]:
# Basic slicing
import numpy as np
a = np.random.rand(10, 10)*5
b = a.astype('int')
print(b)
print('The rows=1 and cols=2 element is {0}'.format(b[1,2])) 
print('The first col is {0}'.format(b[:,0])) # rows, cols. all rows for cols=0
print('The third row is {0}'.format(b[2, :])) # all cols for rows = 2

[[3 0 0 3 3 4 4 3 4 0]
 [2 0 4 2 0 3 0 3 2 0]
 [3 1 2 1 2 0 2 3 3 3]
 [2 4 2 3 0 2 2 3 0 1]
 [2 3 1 3 3 0 4 3 2 0]
 [0 1 2 3 0 1 4 3 2 2]
 [1 1 0 3 3 2 4 4 3 0]
 [2 1 3 4 3 4 2 3 2 0]
 [1 4 0 3 2 2 4 1 0 3]
 [2 0 0 4 1 4 3 4 4 0]]
The rows=1 and cols=2 element is 4
The first col is [3 2 3 2 2 0 1 2 1 2]
The third row is [3 1 2 1 2 0 2 3 3 3]


### ROUTINES

In [105]:
# array(object=, dtype=None, copy=True, order=None, subok=False, ndmin=0)
# Convert any object to a ndarray. If copy is set to True, then a new copy is made.
# Convert a Python list or tuple to numpy array
import numpy as np

c = np.array((4, 5, 6), dtype=np.float32) # Change this to int and see the output
print(c, type(c), c.dtype)

# There is another method called 'asarray' which is same as 'array' except
# that the copy defaults to False.

[ 4.  5.  6.] <class 'numpy.ndarray'> float32


In [106]:
# Will create a linear list of values starting from 'start' and ends at 'stop-1'
# in steps of 'step'
d = np.arange(start=10, stop=20, step=2, dtype=np.float32)
print(d)

[ 10.  12.  14.  16.  18.]


In [108]:
d = np.zeros(shape=(3, 4), dtype=np.int64)
print(d)
print(d.itemsize, d.dtype)

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
8 int64


In [109]:
e = np.ones(shape=(3, 3), dtype=np.int32)
print(e) #

[[1 1 1]
 [1 1 1]
 [1 1 1]]


In [110]:
f = np.identity(n=3, dtype=np.int32)
print(f)

[[1 0 0]
 [0 1 0]
 [0 0 1]]


In [134]:
g = np.random.rand(3, 3)*5
print(g)
print(np.where(g>2)) # Returns the x, y coordinates

[[ 1.94444417  1.75173797  4.51219478]
 [ 3.63695913  0.64913783  0.93919153]
 [ 4.3463223   1.30952849  3.15481494]]
(array([0, 1, 2, 2]), array([2, 0, 0, 2]))


In [80]:
x, y = np.ogrid[:3, :4]
x, y
np.where(x < y, x, 10 + y)

array([[10,  0,  0,  0],
       [10, 11,  1,  1],
       [10, 11, 12,  2]])

In [158]:
m = np.random.rand(100, 100)*100
m = m.astype('int')
print(m)
print('The histogram is {0}'.format(np.histogram(m)))
# The first array is the frequency and the second array is the bin

[[39 33 39 ..., 90 73  1]
 [63 23 92 ..., 30 81  6]
 [79 29 98 ..., 75 54 42]
 ..., 
 [ 9 66  5 ..., 63 82 72]
 [83 27 66 ..., 71 69 22]
 [56 21 70 ..., 64 78 48]]
The histogram is (array([ 952, 1012,  980, 1036, 1035,  982, 1011, 1054,  964,  974]), array([  0. ,   9.9,  19.8,  29.7,  39.6,  49.5,  59.4,  69.3,  79.2,
        89.1,  99. ]))


In [145]:
# Create 10 numbers between 1 and 20
p = np.linspace(start = 1, stop = 20, num=10)
print('The linear space of value is {0}'.format(p))

p = np.logspace(start = 0.1, stop = 0.2, num=10)
print('The log space of value is {0}'.format(p))



The linear space of value is [  1.           3.11111111   5.22222222   7.33333333   9.44444444
  11.55555556  13.66666667  15.77777778  17.88888889  20.        ]
The log space of value is [ 1.25892541  1.29154967  1.32501936  1.35935639  1.39458325  1.43072299
  1.46779927  1.50583635  1.54485915  1.58489319]


### OPERATIONS

In [146]:
import numpy as np

c = np.array((4, 5, 6), dtype=np.float32)
d = np.linspace(start=10, stop=13, num=3)
print(c)
print(d)
f = d-c # Subtract matrix
print(f, f.dtype)

[ 4.  5.  6.]
[ 10.   11.5  13. ]
[ 6.   6.5  7. ] float64


In [147]:
f = 10*c 
# Multiply a matrix with scalar. The matrix e is of dtype=int64 but the final matrix is of dtype=float64
print(f, f.dtype)

[ 40.  50.  60.] float32


In [64]:
h = f > 50 # Compare every element with the value of 0.5
print(h)

[False False  True]


In [148]:
# Indexing with boolean arrays
k = f > 50
print(k)
print(f[k]) # Returns values in p that are True in t
print(f[f>50]) # This is same as the previous line except that we are not creating a new array t

[False False  True]
[ 60.]
[ 60.]


#### Calculate value of pi using Gregory-Leibniz series
$$ 1\,-\,{\frac {1}{3}}\,+\,{\frac {1}{5}}\,-\,{\frac {1}{7}}\,+\,{\frac {1}{9}}\,-\,\cdots \;=\;{\frac {\pi }{4}}.\! $$

In [154]:
noofterms = 100000

# Create numerator array
numerator = np.ones(shape=(1, noofterms))
# Change alternate values from +1 to -1
numerator[0, 1::2] = -1
print("numerator is: {0} ".format(numerator))
# Denominator = 1, 3, 5, 7 ...

# Sum all terms and multiply by 4
denominator = np.linspace(1, noofterms*2-1, noofterms)
print('denominator is: ', denominator)
pival = 4.0*np.sum(numerator/denominator)
print('The value of pi is {0}'.format(pival))

numerator is: [[ 1. -1.  1. ..., -1.  1. -1.]] 
denominator is:  [  1.00000000e+00   3.00000000e+00   5.00000000e+00 ...,   1.99995000e+05
   1.99997000e+05   1.99999000e+05]
The value of pi is 3.141582653589792


In [81]:
#generator
def squared(i):
    for item in i:
        yield item*item
        
for v in squared([4,5,6]):
    print(v)

16
25
36
