In [1]:
import numpy as np
a=np.array([0,1,2,3])
print(a)
print(np.arange(10))

[0 1 2 3]
[0 1 2 3 4 5 6 7 8 9]


In [2]:
#python lists
L=range(1000)
%timeit [i**2 for i in L]   #(%timeit) measures how much time it took per iteration

530 µs ± 17.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [3]:
a=np.arange(1000)
%timeit a**2

2.87 µs ± 129 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## 1.creating arrays

## 1.1 manual construction of arrays

In [4]:
#1-D

a=np.array([0,1,2,3])
a

array([0, 1, 2, 3])

In [5]:
#print dimensions

a.ndim

1

In [6]:
#shape
a.shape

(4,)

In [7]:
len(a)

4

In [8]:
#2-D,3-D

b=np.array([[0,1,2],[3,4,5]])

b

array([[0, 1, 2],
       [3, 4, 5]])

In [9]:
b.ndim

2

In [10]:
b.shape

(2, 3)

In [11]:
len(b) #returns the size of first dimension

2

In [12]:
c=np.array([[[0,1],[2,3]],[[4,5],[6,7]]])

c

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [13]:
c.ndim

3

In [14]:
c.shape

(2, 2, 2)

## 1.2 functions for creating array

In [15]:
#using arrange function

#arange is an array-valued version of the built-in python range function

a=np.arange(10) #0...n-1

a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [16]:
b=np.arange(1,10,2)  #start,end(exclusive),step

b

array([1, 3, 5, 7, 9])

In [17]:
#using linspace

a=np.linspace(0,1,6)  #start,end,number of points

a

array([0. , 0.2, 0.4, 0.6, 0.8, 1. ])

In [18]:
#common arrays
a=np.ones((3,3))

a

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [19]:
b=np.zeros((3,3))

b

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [20]:
c=np.eye(3)  #return a 2-D array with ones on the diagonal and zero elsewhere.

c

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [21]:
d=np.eye(3,2)  #3 is number of rows, 2 is num,ber of columns, index of diagonal

d

array([[1., 0.],
       [0., 1.],
       [0., 0.]])

In [22]:
# create array using diagonal functions

a=np.diag([1,2,3,4])  #construct a diagonal array.

a

array([[1, 0, 0, 0],
       [0, 2, 0, 0],
       [0, 0, 3, 0],
       [0, 0, 0, 4]])

In [23]:
np.diag(a)  #extract diagonal

array([1, 2, 3, 4])

In [24]:
#create array using random

#create an array of the given shape and populate it with random samples

a=np.random.rand(4)

a

array([0.2463287 , 0.74835778, 0.50531311, 0.24137325])

## 2. basic datatypes

In [25]:
a=np.arange(10)

a.dtype

dtype('int32')

In [26]:
# you can explicitly specify which data-type you want:

a=np.arange(10,dtype='float64')

a

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [27]:
#the default data type is float for zeros and ones for function

a=np.zeros((3,3))

print(a)

a.dtype

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


dtype('float64')

## other datatypes

In [28]:
d=np.array([1+2j, 2+4j])  #complex datatype

print(d.dtype)

complex128


In [29]:
b=np.array([True,False,True,False])  #boolean datatype

print(b.dtype)

bool


In [30]:
s=np.array(['ram','robert','rahim'])

s.dtype

dtype('<U6')

## 3.indexing and slicing

In [31]:
a=np.arange(10)

print(a[5])   #indices begin at 0, like other python sequences (and c/c++)

5


In [32]:
#for multidimensional arrays, indexes are tuples of integers:

a=np.diag([1,2,3])

print(a[2,2])

3


In [33]:
a[2,1]=5  #assigning value

a

array([[1, 0, 0],
       [0, 2, 0],
       [0, 5, 3]])

## 3.2 slicing

In [34]:
a=np.arange(10)

a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [35]:
a[1:8:2]  #[startindex: endindex(exclusive) : step]

array([1, 3, 5, 7])

In [36]:
#we can also combine assignment and slicing:

a=np.arange(10)
a[5:]=10
a

array([ 0,  1,  2,  3,  4, 10, 10, 10, 10, 10])

In [37]:
b=np.arange(5)
a[5:]=b[::-1]   #assigning

a

array([0, 1, 2, 3, 4, 4, 3, 2, 1, 0])

## copies and views

In [38]:
a=np.arange(10)

a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [39]:
b=a[::2]

b

array([0, 2, 4, 6, 8])

In [40]:
np.shares_memory(a,b)

True

In [41]:
b[0]=10

b

array([10,  2,  4,  6,  8])

In [42]:
a  #eventhough we modified b, it updated 'a' because both shares same memory

array([10,  1,  2,  3,  4,  5,  6,  7,  8,  9])

In [43]:
a=np.arange(10)
c=a[::2].copy()   #force a copy
c

array([0, 2, 4, 6, 8])

In [44]:
np.shares_memory(a,c)

False

In [45]:
c[0]=10

a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## 5.fancy indexing

- numpy arrays can be indexed with slices, but also with boolean or integer arrays(masks).this method is called fancy indexing. it creates copies not view.

In [46]:
#using boolean mask

a=np.random.randint(0,20,15)

a

array([ 7,  2,  5, 14,  5,  3, 11, 17, 16, 11,  7,  9,  5,  7, 17])

In [47]:
mask=(a%2==0)

In [48]:
extract_from_a=a[mask]

extract_from_a

array([ 2, 14, 16])

In [49]:
a[mask]=-1
a

array([ 7, -1,  5, -1,  5,  3, 11, 17, -1, 11,  7,  9,  5,  7, 17])

## indexing with an array of integers

In [50]:
a=np.arange(0,100,10)

a

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [51]:
a[[2,3,2,4,2]]

array([20, 30, 20, 40, 20])

In [52]:
#new values can be assigned

a[[9,7]]=-200

a

array([   0,   10,   20,   30,   40,   50,   60, -200,   80, -200])

## elementwise operations

## 1.basic operations

## with scalars

In [53]:
a=np.array([1,2,3,4]) #create an array

a+1

array([2, 3, 4, 5])

In [54]:
a**2

array([ 1,  4,  9, 16], dtype=int32)

## all arithematic operates elementwise

In [57]:
b=np.ones(4)+1
print(b)
a-b

[2. 2. 2. 2.]


array([-1.,  0.,  1.,  2.])

In [58]:
a*b

array([2., 4., 6., 8.])

In [59]:
#matrix multiplication

c=np.diag([1,2,3,4])

print(c*c)
print("***************")
print(c.dot(c))

[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]
***************
[[ 1  0  0  0]
 [ 0  4  0  0]
 [ 0  0  9  0]
 [ 0  0  0 16]]


## comparisions

In [60]:
a=np.array([1,2,3,4])
b=np.array([5,2,2,4])
a==b

array([False,  True, False,  True])

In [61]:
a>b

array([False, False,  True, False])

In [62]:
#array wise comparisions
a=np.array([1,2,3,4])
b=np.array([5,2,2,4])
c=np.array([1,2,3,4])

np.array_equal(a,b)

False

In [63]:
np.array_equal(a,c)

True

## logical operations

In [64]:
a=np.array([1,1,0,0],dtype=bool)
b=np.array([1,0,1,0],dtype=bool)

np.logical_or(a,b)

array([ True,  True,  True, False])

In [65]:
np.logical_and(a,b)

array([ True, False, False, False])

## transcendental functions:

In [66]:
a=np.arange(5)

np.sin(a)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [67]:
np.log(a)

  np.log(a)


array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436])

In [68]:
np.exp(a)  #evaluates e^x for each element in a given input

array([ 1.        ,  2.71828183,  7.3890561 , 20.08553692, 54.59815003])

## shape mismatch

In [69]:
a=np.arange(4)
a+np.array([1,2])

ValueError: operands could not be broadcast together with shapes (4,) (2,) 

## basic reductions

## computing sums

In [71]:
x=np.array([1,2,3,4])

np.sum(x)

10

In [72]:
#sum by rows and by columns

x=np.array([[1,1],[2,2]])

x

array([[1, 1],
       [2, 2]])

In [73]:
x.sum(axis=0)  #columns first dimension, column wise sum

array([3, 3])

In [75]:
x.sum(axis=1)  #rows(second dimension), row wise sum

array([2, 4])

## other reductions

In [76]:
x=np.array([1,3,2])

x.min()

1

In [77]:
x.max()

3

In [80]:
x.argmin()  #index of minimum element

0

In [81]:
x.argmax()  #index of maximum element

1

## logical operations

In [82]:
np.all([True,True,False])

False

In [83]:
np.any([True,False,False])

True

In [84]:
#note: can be used for array comparisions

a=np.zeros((50,50))
np.any(a!=0)

False

In [85]:
np.all(a==a)

True

In [86]:
a=np.array([1,2,3,2])
b=np.array([2,2,3,2])
c=np.array([6,4,4,5])

((a<=b) & (b<=c)).all()

True

## statistics

In [87]:
x=np.array([1,2,3,1])
y=np.array([[1,2,3],[5,6,1]])
x.mean()

1.75

In [88]:
np.median(x)

1.5

In [89]:
np.median(y,axis=-1)  #last axis

array([2., 5.])

In [90]:
x.std()    #full population standard dev.

0.82915619758885

In [91]:
#loading data into numpy array object
data=np.loadtxt('C:/Users/Suhit/Desktop/ML/ML course Applied/examples/populations.txt')

In [92]:
data

array([[ 1900., 30000.,  4000., 48300.],
       [ 1901., 47200.,  6100., 48200.],
       [ 1902., 70200.,  9800., 41500.],
       [ 1903., 77400., 35200., 38200.],
       [ 1904., 36300., 59400., 40600.],
       [ 1905., 20600., 41700., 39800.],
       [ 1906., 18100., 19000., 38600.],
       [ 1907., 21400., 13000., 42300.],
       [ 1908., 22000.,  8300., 44500.],
       [ 1909., 25400.,  9100., 42100.],
       [ 1910., 27100.,  7400., 46000.],
       [ 1911., 40300.,  8000., 46800.],
       [ 1912., 57000., 12300., 43800.],
       [ 1913., 76600., 19500., 40900.],
       [ 1914., 52300., 45700., 39400.],
       [ 1915., 19500., 51100., 39000.],
       [ 1916., 11200., 29700., 36700.],
       [ 1917.,  7600., 15800., 41800.],
       [ 1918., 14600.,  9700., 43300.],
       [ 1919., 16200., 10100., 41300.],
       [ 1920., 24700.,  8600., 47300.]])

In [93]:
year,hares,lynxes,carrots=data.T  #columns to variables
print(year)

[1900. 1901. 1902. 1903. 1904. 1905. 1906. 1907. 1908. 1909. 1910. 1911.
 1912. 1913. 1914. 1915. 1916. 1917. 1918. 1919. 1920.]


In [95]:
#the mean popultion over time
populations=data[:,1:]
populations

array([[30000.,  4000., 48300.],
       [47200.,  6100., 48200.],
       [70200.,  9800., 41500.],
       [77400., 35200., 38200.],
       [36300., 59400., 40600.],
       [20600., 41700., 39800.],
       [18100., 19000., 38600.],
       [21400., 13000., 42300.],
       [22000.,  8300., 44500.],
       [25400.,  9100., 42100.],
       [27100.,  7400., 46000.],
       [40300.,  8000., 46800.],
       [57000., 12300., 43800.],
       [76600., 19500., 40900.],
       [52300., 45700., 39400.],
       [19500., 51100., 39000.],
       [11200., 29700., 36700.],
       [ 7600., 15800., 41800.],
       [14600.,  9700., 43300.],
       [16200., 10100., 41300.],
       [24700.,  8600., 47300.]])

In [96]:
# sample standard deviations
populations.std(axis=0)

array([20897.90645809, 16254.59153691,  3322.50622558])

In [97]:
#which species has the highest population each year?

np.argmax(populations,axis=1)

array([2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2],
      dtype=int64)

## broadcasting

In [101]:
a=np.tile(np.arange(0,40,10),(3,1))
print(a)
print("\n")
print("*********************")
print("\n")
a=a.T
print(a)

[[ 0 10 20 30]
 [ 0 10 20 30]
 [ 0 10 20 30]]


*********************


[[ 0  0  0]
 [10 10 10]
 [20 20 20]
 [30 30 30]]


In [102]:
b=np.array([0,1,2])

b

array([0, 1, 2])

In [103]:
a+b

array([[ 0,  1,  2],
       [10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [104]:
a=np.arange(0,40,10)

a.shape

(4,)

In [105]:
a=a[:,np.newaxis] #adds a new axis-> 2D array
a.shape

(4, 1)

In [106]:
a

array([[ 0],
       [10],
       [20],
       [30]])

## array shape manipulation

### flattening

In [114]:
a=np.array([[1,2,3],[4,5,6]])
a.ravel()  #return a contaguous flattened array. A 1-D array,


array([1, 2, 3, 4, 5, 6])

In [112]:
a.T  #transpose
a.T.shape

(3, 2)

In [113]:
a.T.ravel()

array([1, 4, 2, 5, 3, 6])

### reshaping

- the inverse operation to flattening:

In [115]:
print(a.shape)
print(a)

(2, 3)
[[1 2 3]
 [4 5 6]]


In [116]:
b=a.ravel()
print(b)

[1 2 3 4 5 6]


In [117]:
b=b.reshape((2,3))
b

array([[1, 2, 3],
       [4, 5, 6]])

In [118]:
b[0,0] = 100
a

array([[100,   2,   3],
       [  4,   5,   6]])

- note and beware: reshape may also return a copy !

In [120]:
a=np.zeros((3,2))
b=a.T.reshape(3*2)
b[0]=50
a

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

## adding a dimension

- indexing with the np.newaxis object allows us to add an axis to an array

- newaxis is used to increase the dimension of the existing array by one more dimension,when used once.

- thus,

1D array will become 2D array

2D array will become 3D array

3D array will become 4D array and so on

In [121]:
z=np.array([1,2,3])

z

array([1, 2, 3])

In [122]:
z[:,np.newaxis]

array([[1],
       [2],
       [3]])

### dimension shuffling

In [123]:
a=np.arange(4*3*2).reshape(4,3,2)

a.shape

(4, 3, 2)

In [124]:
a

array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15],
        [16, 17]],

       [[18, 19],
        [20, 21],
        [22, 23]]])

In [125]:
a[0,2,1]

5

### resizing

In [126]:
a=np.arange(4)
a.resize((8,))
a

array([0, 1, 2, 3, 0, 0, 0, 0])

- how ever it must not be refferd to somewhere else:

In [127]:
b=a
a.resize((4,))

ValueError: cannot resize an array that references or is referenced
by another array in this way.
Use the np.resize function or refcheck=False

## sorting data

In [128]:
#sorting along an axis:

a=np.array([[5,4,6],[2,3,2]])
b=np.sort(a,axis=1)
b

array([[4, 5, 6],
       [2, 2, 3]])

In [129]:
#in-place sort
a.sort(axis=1)
a

array([[4, 5, 6],
       [2, 2, 3]])

In [131]:
#sorting with fancy indexing
a=np.array([4,3,1,2])
j=np.argsort(a)
j

array([2, 3, 1, 0], dtype=int64)

In [132]:
a[j]

array([1, 2, 3, 4])