In [None]:
# http://people.duke.edu/~ccc14/pcfb/numpympl/NumpyBasics.html
# https://docs.scipy.org/doc/numpy-dev/user/quickstart.html
# http://www.engr.ucsb.edu/~shell/che210d/numpy.pdf
# http://www.labri.fr/perso/nrougier/teaching/numpy/numpy.html
# http://www.labri.fr/perso/nrougier/teaching/numpy.100/

# NUMPY TO MATLAB - https://docs.scipy.org/doc/numpy-dev/user/numpy-for-matlab-users.html

In [1]:
import numpy as np

## NDARRAY
An ndaray is a n-dimensional array where all items are of the same type (unlike a Python data structure) and consequently use the same amount of space. There are 21 different types of objects (also called dtypes) that can be stored in ndarray. They are
* bool_ 
* byte
* short
* intc
* int_
* longlong
* intp
* ubyte
* ushort
* uintc
* uint
* ulonglong
* uintp
* single
* float_
* longfloat
* csingl
* complex_
* clongfloat
* object_
* str_
* unicode_
* void

For some of the dtypes, a _ to differentiate that dtype from the corresponding Python type. Such types are also called as 'enhanced scalars).  They have the same precision as the Python type.

All the types except the str_, unicode_ and void are of fixed size. 

In [2]:
# Creating a simple ndarray
a = np.arange(8) # similar to range(8)
print(a, type(a), a.dtype)

[0 1 2 3 4 5 6 7] <class 'numpy.ndarray'> int32


In [4]:
# Indexing
print(a[3], type(a[3])) # since there is only one value, its type is the type of each element
print(a[2:5], type(a[2:5])) # The slicing results in an ndarray

3 <class 'numpy.int64'>
[2 3 4] <class 'numpy.ndarray'>


In [5]:
# Universal functions or ufunc
# They perform element by element operation on an ndarray.
b = np.sin(a)
print(b)

c = a+b#np.add(a, b)
print(c)

# For a full list of ufunc, visit 
# http://docs.scipy.org/doc/numpy/reference/ufuncs.html

[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866 ]
[ 0.          1.84147098  2.90929743  3.14112001  3.2431975   4.04107573
  5.7205845   7.6569866 ]


In [8]:
# In the case of add function, a and b both had same sized vector.
# What happens if they are of different sizes as in the example below
d = np.add(a, 3)
print(d)

# The meaning of adding a which is a vector to a scalar 3 is done by 
# adding each element in a with the value 3. In otherwords, the value 3
# was 'broadcast' to each element in a and added.

[ 3  4  5  6  7  8  9 10]


In [12]:
# NDARRAY attributes

print("Shape is: ", b.shape, b.size) # Size of the matrix
print("Number of dimensions are: ", b.ndim) # Number of dimensions
print("Data type: ", b.dtype) # Data type of each element
print("Itemsize: ", b.itemsize) # Memory occupied by each element
print("type: ", type(b))  # Type of b
print("dir: ", dir(b.dtype))

Shape is:  (8,) 8
Number of dimensions are:  1
Data type:  float64
Itemsize:  8
type:  <class 'numpy.ndarray'>
dir:  ['__bool__', '__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', 'alignment', 'base', 'byteorder', 'char', 'descr', 'fields', 'flags', 'hasobject', 'isalignedstruct', 'isbuiltin', 'isnative', 'itemsize', 'kind', 'metadata', 'name', 'names', 'ndim', 'newbyteorder', 'num', 'shape', 'str', 'subdtype', 'type']


### ARRAY CONVERSION

In [13]:
print(b.tolist()) # convert ndarray b to list

[0.0, 0.8414709848078965, 0.9092974268256817, 0.1411200080598672, -0.7568024953079282, -0.9589242746631385, -0.27941549819892586, 0.6569865987187891]


In [15]:
# Write the vector b to a csv file with 3 precision
b.tofile(file="data.csv", sep=",", format="%0.3f")

In [11]:
np.save('b.npy', [a, b])
# Read more about npy format at https://docs.scipy.org/doc/numpy/neps/npy-format.html
c1, d1 = np.load('b.npy')
print(a, b, a.dtype)
print(c1, d1, c1.dtype)

[0 1 2 3 4 5 6 7] [ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866 ] int64
[ 0.  1.  2.  3.  4.  5.  6.  7.] [ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866 ] float64


### ARRAY SHAPE 

In [20]:
a = np.arange(8)
b = a.reshape((2, 4), order='C') # Change the matrix to size 2x4
print(b, a, b.shape)

[[0 1 2 3]
 [4 5 6 7]] [0 1 2 3 4 5 6 7] (2, 4)


In [19]:
c = a.resize((4, 2))
print(c) # None as resize modifies inplace while reshape creates a new array
print(a)

None
[[0 1]
 [2 3]
 [4 5]
 [6 7]]


In [19]:
a = np.arange(8).reshape((2, 4), order='C' )
flat_a = a.flatten()
print(flat_a) # Convert to 1D vector
flat_a[0]= 6
print(flat_a)
print(a) 
# Since flat_a is a new copy, any change to flat_a does not affect a

[0 1 2 3 4 5 6 7]
[6 1 2 3 4 5 6 7]
[[0 1 2 3]
 [4 5 6 7]]


In [21]:
a = np.arange(8).reshape((2, 4), order='C' )
ravel_a = a.ravel() # Convert to 1D vector
print(ravel_a)
ravel_a[0]= 6
print(ravel_a)
print(a)
# Since ravel_a is NOT a copy, any change to ravel_a affects a

[0 1 2 3 4 5 6 7]
[6 1 2 3 4 5 6 7]
[[6 1 2 3]
 [4 5 6 7]]


In [21]:
# Shallow copy
d = a
a[0][0] = 4
print(a)
print(d) # Note d and a will have same values even though we modified only a

[[4 1 2 3]
 [4 5 6 7]]
[[4 1 2 3]
 [4 5 6 7]]


In [22]:
# Deep copy
d = a.copy()
a[0][0] = 3
print(a)
print(d) # Note d and a will have different values

[[3 1 2 3]
 [4 5 6 7]]
[[4 1 2 3]
 [4 5 6 7]]


In [44]:
a = np.random.randint(1, 10, (100,))
print(a)

[5 3 5 3 1 5 7 5 9 8 1 7 6 3 2 3 8 4 4 1 5 6 6 8 3 4 3 1 1 2 1 7 1 7 5 7 4
 1 8 2 5 4 3 5 4 5 7 9 6 1 4 3 7 3 7 7 1 4 8 8 4 7 7 4 6 1 7 9 9 2 6 3 2 4
 8 8 4 5 3 1 3 2 7 1 3 8 5 5 5 7 8 2 1 3 2 4 9 6 6 9]


## ARRAY MANIPULATION

In [46]:
import numpy as np
a = np.random.rand(2, 4)
print(a)
a.sort(axis=1) # sort(axis=-1, kind=’quick’, order=None)
print(a)

[[ 0.97113463  0.92906228  0.84403312  0.29306667]
 [ 0.05952695  0.1634882   0.20129477  0.24278193]]
[[ 0.29306667  0.84403312  0.92906228  0.97113463]
 [ 0.05952695  0.1634882   0.20129477  0.24278193]]


In [29]:
a = np.random.rand(2, 4)*2
a = a.astype(np.int) # casting to_cast()
print(a, a.dtype)
print(a.nonzero())

[[0 1 1 0]
 [1 0 1 0]] int64
(array([0, 0, 1, 1]), array([1, 2, 0, 2]))


### ARRAY CALCULATIONS

In [27]:
import numpy as np
a = np.random.rand(2, 2)*5
b = a.astype('int')
print(b)
print('Any element is {0}'.format(b.any()))
print('Sum of all elements is {0}'.format(b.sum()))
print('The product of all element is {0}'.format(b.prod()))
print('The max of all element is {0}'.format(b.max()))

[[3 4]
 [3 3]]
Any element is True
Sum of all elements is 13
The product of all element is 108
The max of all element is 4


### ARRAY INDEXING

In [54]:
# Basic slicing
import numpy as np
a = np.random.rand(10, 10)*5
b = a.astype('int')
print(b)
print('The rows=1 and cols=2 element is {0}'.format(b[1,2])) 
print('The first col is {0}'.format(b[:,0])) # rows, cols. all rows for cols=0
print('The third row is {0}'.format(b[2, :])) # all cols for rows = 2

[[0 4 3 3 2 1 4 2 3 0]
 [0 3 3 0 0 4 3 2 3 4]
 [3 4 2 0 2 0 3 3 3 3]
 [1 2 1 0 0 0 2 4 2 3]
 [1 0 4 2 4 1 0 0 1 4]
 [0 4 4 0 0 3 2 2 0 3]
 [1 3 3 0 0 0 0 1 0 4]
 [4 3 1 3 3 4 2 3 4 3]
 [2 2 1 0 2 0 2 3 4 1]
 [0 4 2 1 0 0 3 1 4 1]]
The rows=1 and cols=2 element is 3
The first col is [0 0 3 1 1 0 1 4 2 0]
The third row is [3 4 2 0 2 0 3 3 3 3]


### ROUTINES

In [28]:
# array(object=, dtype=None, copy=True, order=None, subok=False, ndmin=0)
# Convert any object to a ndarray. If copy is set to True, then a new copy is made.
# Convert a Python list or tuple to numpy array
import numpy as np

c = np.array((4, 5, 6), dtype=np.float32) # Change this to int and see the output
print(c, type(c), c.dtype)

# There is another method called 'asarray' which is same as 'array' except
# that the copy defaults to False.

[ 4.  5.  6.] <class 'numpy.ndarray'> float32


In [55]:
# Will create a linear list of values starting from 'start' and ends at 'stop-1'
# in steps of 'step'
d = np.arange(start=10, stop=20, step=2, dtype=np.float32)
print(d)

[ 10.  12.  14.  16.  18.]


In [30]:
d = np.zeros(shape=(3, 4), dtype=np.int64)
print(d)
print(d.itemsize, d.dtype)

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
8 int64


In [31]:
e = np.ones(shape=(3, 3), dtype=np.int32)
print(e)

[[1 1 1]
 [1 1 1]
 [1 1 1]]


In [32]:
f = np.identity(n=3, dtype=np.int32)
print(f)

[[1 0 0]
 [0 1 0]
 [0 0 1]]


In [56]:
g = np.random.rand(3, 3)*5
print(g)
print(np.where(g)) # Returns the x, y coordinates

[[ 0.28523417  2.23288863  4.39450885]
 [ 0.19131807  3.47917344  1.47106294]
 [ 2.60710066  1.92032452  0.94064779]]
(array([0, 0, 0, 1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2, 0, 1, 2]))


In [33]:
# Create 10 numbers between 1 and 20
p = np.linspace(start = 1, stop = 20, num=10)
print('The linear space of value is {0}'.format(p))

p = np.logspace(start = 0.1, stop = 0.20, num=10)
print('The log space of value is {0}'.format(p))

The linear space of value is [  1.           3.11111111   5.22222222   7.33333333   9.44444444
  11.55555556  13.66666667  15.77777778  17.88888889  20.        ]
The log space of value is [ 1.25892541  1.29154967  1.32501936  1.35935639  1.39458325  1.43072299
  1.46779927  1.50583635  1.54485915  1.58489319]


### OPERATIONS

In [2]:
import numpy as np

c = np.array((4, 5, 6), dtype=np.float32)
d = np.linspace(start=10, stop=12, num=3)
print(c)
print(d)
f = d/c # Subtract matrix
print(f, f.dtype)

[4. 5. 6.]
[10. 11. 12.]
[2.5 2.2 2. ] float64


In [10]:
f = 10*c 
print(f, f.dtype)

[40. 50. 60.] float32


In [4]:
h = f > 50 # Compare every element with the value of 0.5
print(h)

[False False  True]


In [6]:
# Indexing with boolean arrays
k = f > 50
print(k)
print(f[k]) # Returns values in p that are True in t
print(f[f>50]) # This is same as the previous line except that we are not creating a new array t

[False False  True]
[60.]
[60.]


In [None]:
# min() will return the minimum value 

a = np.array([[5, 7, 9], [3, -6, 11], [2, -8, 13]])
print(a)
print(np.min(a))
print(np.min(a, 0)) # along the columns
print(np.min(a, 1)) # along the rows

In [None]:
# max() will return the maximum value

a = np.array([[5, 7, 9], [3, 6, 11], [2, 8, 13]])
print(a)
print(np.max(a))
print(np.max(a, 0)) # along the columns
print(np.max(a, 1)) # along the rows

In [None]:
# mean() turns the mean or the average

a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.mean(a))
print(np.mean(a, axis=0)) # along the columns
print(np.mean(a, axis=1)) # along the rows

In [None]:
# median() returns the median 

a = np.array([[20, 10, 50], [30, 50, 70], [60, 40, 10]])
print(a)
print(np.median(a))
print(np.median(a, axis=0)) # along the columns
print(np.median(a, axis=1)) # along the rows


In [None]:
# standard deviation is the square root of average of squared deviations 
# from the mean

a = np.array([1, 2, 3, 4, 5])
print(np.mean(a))
print(np.std(a))
# this value is nothing but the square root of 2.

In [None]:
"""
In-class activity - create an ndarray with values [2, -4, 6], [7, 4, 8] and [5, 10, 4]. 
Determine the following:
1) The maximum value
2) The minimum value
3) The mean and median values 
4) The standard deviation. 
"""