## Machine Learning - Session 1 - NumPy

In [1]:
#Import the numpy library
import numpy as np

#### A. Defining a 1D numpy array

In [2]:
x = np.array([12,34,24,45,7,18])
x

array([12, 34, 24, 45,  7, 18])

#### B. Creating Regular Sequence

In [3]:
#Create a regular sequence of integers starting from 1 to 10. integers = c(1,2,3,4,.......,10)

integers = np.arange(1,11,1)
integers

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [4]:
#Create a regular sequence of integers starting from 10 to 1.

integers = np.arange(10,0,-1)
integers

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

In [6]:
#Create a regular sequence of even integers between 10 to 20

even = np.arange(10,21,2)
even

array([10, 12, 14, 16, 18, 20])

In [8]:
#Create a regular sequence of the following type: [10.0,10.5,11.0,11.5, ... , 19.5, 20.0]

frac = np.arange(10,20.5,0.5)
frac

array([ 10. ,  10.5,  11. ,  11.5,  12. ,  12.5,  13. ,  13.5,  14. ,
        14.5,  15. ,  15.5,  16. ,  16.5,  17. ,  17.5,  18. ,  18.5,
        19. ,  19.5,  20. ])

In [10]:
#Create a regular sequence of length 20 ranging from 0 to 5

np.linspace(start=0,stop=5,num=20,endpoint=True)  # if endpoint is True, it will include the stop argument. So in this case value is ending at 5. 5 is also considered.

array([ 0.        ,  0.26315789,  0.52631579,  0.78947368,  1.05263158,
        1.31578947,  1.57894737,  1.84210526,  2.10526316,  2.36842105,
        2.63157895,  2.89473684,  3.15789474,  3.42105263,  3.68421053,
        3.94736842,  4.21052632,  4.47368421,  4.73684211,  5.        ])

In [11]:
#Help for np.linspace

help(np.linspace)

Help on function linspace in module numpy.core.function_base:

linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)
    Return evenly spaced numbers over a specified interval.
    
    Returns `num` evenly spaced samples, calculated over the
    interval [`start`, `stop`].
    
    The endpoint of the interval can optionally be excluded.
    
    Parameters
    ----------
    start : scalar
        The starting value of the sequence.
    stop : scalar
        The end value of the sequence, unless `endpoint` is set to False.
        In that case, the sequence consists of all but the last of ``num + 1``
        evenly spaced samples, so that `stop` is excluded.  Note that the step
        size changes when `endpoint` is False.
    num : int, optional
        Number of samples to generate. Default is 50. Must be non-negative.
    endpoint : bool, optional
        If True, `stop` is the last sample. Otherwise, it is not included.
        Default is True.
    retstep : bo

In [12]:
#Studying the arange function

help(np.arange)

Help on built-in function arange in module numpy.core.multiarray:

arange(...)
    arange([start,] stop[, step,], dtype=None)
    
    Return evenly spaced values within a given interval.
    
    Values are generated within the half-open interval ``[start, stop)``
    (in other words, the interval including `start` but excluding `stop`).
    For integer arguments the function is equivalent to the Python built-in
    `range <http://docs.python.org/lib/built-in-funcs.html>`_ function,
    but returns an ndarray rather than a list.
    
    When using a non-integer step, such as 0.1, the results will often not
    be consistent.  It is better to use ``linspace`` for these cases.
    
    Parameters
    ----------
    start : number, optional
        Start of interval.  The interval includes this value.  The default
        start value is 0.
    stop : number
        End of interval.  The interval does not include this value, except
        in some cases where `step` is not an integer and

#### C. Creating a Random Sequence

In [14]:
#Generate 20 random numbers between 1 and 100
np.random.seed(0)     #seed will give same set of random values for all ppl who have the same seed value.
np.random.randint(1,100,20)

array([45, 48, 65, 68, 68, 10, 84, 22, 37, 88, 71, 89, 89, 13, 59, 66, 40,
       88, 47, 89])

In [18]:
#Shuffle the vector x
x = np.array([12,34,24,45,7,18])
np.random.shuffle(x)
x

array([ 7, 34, 18, 45, 24, 12])

In [20]:
#Permute x and return it

x = np.array([12,34,24,45,7,18])
np.random.permutation(x)

array([18, 24, 45,  7, 34, 12])

In [28]:
#Choice

np.random.choice(x,3,replace=False)

array([18, 24, 45])

In [29]:
#help(np.random.choice)

To find more on numpy random sampling Go To this link 
https://docs.scipy.org/doc/numpy-1.12.0/reference/routines.random.html 

#### D. Some basic numpy methods

In [30]:
x = np.array([12,34,24,45,7,18])  #datatype specific func are called mathods

In [31]:
#Size of the array
x.size

6

In [33]:
#data type
x.dtype

dtype('int32')

In [34]:
#Type of the object
type(x)

numpy.ndarray

In [35]:
#Type conversion
x.astype(str)

array(['12', '34', '24', '45', '7', '18'],
      dtype='<U11')

In [36]:
x.astype(float)

array([ 12.,  34.,  24.,  45.,   7.,  18.])

#### E. Some basic (1D) numpy operations

In [37]:
#Consider that we have two arrays

x = np.array([12,34,24,45,7,18])
y = np.array([3,6,8,5,1,10])

In [38]:
#Addition
x+y

array([15, 40, 32, 50,  8, 28])

In [39]:
#Subtraction
x-y

array([ 9, 28, 16, 40,  6,  8])

In [41]:
#Multiplication by scalar
y*10

array([ 30,  60,  80,  50,  10, 100])

In [45]:
#Division
y/10

array([ 0.3,  0.6,  0.8,  0.5,  0.1,  1. ])

In [44]:
#Element wise multiplication (dot product)
x*y

array([ 36, 204, 192, 225,   7, 180])

In [47]:
#Using dot method
x.dot(y)

844

In [46]:
#Squaring each elements in an array
x**2

array([ 144, 1156,  576, 2025,   49,  324], dtype=int32)

#### F. Some Useful Numpy Functions

In [48]:
y = np.array([3,6,8,5,1,10])

In [49]:
#Sum
np.sum(y)

33

In [50]:
#Mean
np.mean(y)

5.5

In [51]:
#Standard Deviation
np.std(y)

2.9860788111948193

In [52]:
#Minimum
np.min(y)

1

In [53]:
#Maximum
np.max(y)

10

In [54]:
#Percentiles
np.percentile(y,0.25)   #0.25 is 1st quartile

1.0250000000000001

In [55]:
np.percentile(y,[0.25,0.75])  #0.25 is 1st quartile, 0.75 is 3rd quartile

array([ 1.025,  1.075])

In [56]:
#Sorting
np.sort(y)

array([ 1,  3,  5,  6,  8, 10])

#### G. Subsetting a 1D numpy array

In [57]:
x = np.array([12,34,24,45,7,18])
y = np.array([3,6,8,5,1,10])

In [58]:
#Subsetting and slicing is same as list
x[0]

12

In [59]:
x[3:5]

array([45,  7])

#### H. Conditional Subsetting

**The Comparison Operators**

      > is greater than
      < is less than
      == is equal to
      <= is less than equal to
      >= is greater than equal to

In [60]:
x = np.array([12,34,24,45,7,18])
y = np.array([3,6,8,5,1,10])

In [61]:
x > 20

array([False,  True,  True,  True, False, False], dtype=bool)

In [62]:
y <= 5

array([ True, False, False,  True,  True, False], dtype=bool)

In [63]:
y == 1

array([False, False, False, False,  True, False], dtype=bool)

**The Logical Operators**

    AND: &
    OR : |

In [64]:
(x>30) | (x<10)

array([False,  True, False,  True,  True, False], dtype=bool)

In [65]:
(x>10) & (x<30)

array([ True, False,  True, False, False,  True], dtype=bool)

**Problems**

In [67]:
#Consider the following two vectors

x = np.array([33,45,23,67,54,48])
y = np.array([108,151,164,119,135,122])

In [68]:
#a. Vaues of x that are less than 35

x[x<35]

array([33, 23])

In [75]:
#b. The number of observations in y that are more than 150

np.size(y[y>150])

2

In [77]:
#OR
sum(y>150)   # this will add up number of trues which satisfy the condition

2

In [79]:
#c. The number of observations in y that are between 120 and 165

sum((y>120) & (y< 165))

4

In [72]:
#d. The vaues in x that are less than 30 or greater than 50
x[(x< 30) | (x > 50)]

array([23, 67, 54])

In [73]:
#e. The values in x for which the values in y is less than or equal to 120
x[y<= 120]

array([33, 67])

In [74]:
#f. The values in y for which the values in x is equal to 45
y[x== 45]

array([151])

#### I. Defining a 2D numpy array

In [80]:
#Converting a 1D array into 2D array
x = np.array([33,45,23,67,54,48])

matrix = x.reshape(2,3)
matrix

array([[33, 45, 23],
       [67, 54, 48]])

In [81]:
matrix = x.reshape(3,2)
matrix

array([[33, 45],
       [23, 67],
       [54, 48]])

In [84]:
#Create a 3x3 matrix containing elements from 1 to 9
mat = np.arange(1,10,1)
matr = mat.reshape(3,3)
matr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [86]:
np.arange(1,10).reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [88]:
#Joining two 1D arrays to form a matrix
x = np.array([33,45,23,67,54,48])
y = np.array([108,151,164,119,135,122])

matrix = np.array([x,y])
matrix

array([[ 33,  45,  23,  67,  54,  48],
       [108, 151, 164, 119, 135, 122]])

#### J. Some basic methods for 2D numpy array

In [89]:
matrix = np.array([[38,33,47],
                    [29,48,35],
                    [21,34,47],
                    [38,44,21],
                    [28,26,43]])
matrix

array([[38, 33, 47],
       [29, 48, 35],
       [21, 34, 47],
       [38, 44, 21],
       [28, 26, 43]])

In [90]:
#Checking the shape of the matrix
matrix.shape

(5, 3)

In [91]:
#Dimension of the array
matrix.ndim

2

In [92]:
#Data type of the elements in the array
matrix.dtype

dtype('int32')

In [93]:
matrix.dtype.name

'int32'

In [94]:
#Number of elements present in the matrix
matrix.size

15

In [95]:
#Size of the matrix in bytes
matrix.itemsize

4

#### K. Subsetting 2D numpy array

In [None]:
#matrix
matrix = np.array([[38,33,47],
                    [29,48,35],
                    [21,34,47],
                    [38,44,21],
                    [28,26,43]])
matrix

In [96]:
#a) Print the first element of the matrix.

matrix[0,0]

38

In [98]:
#b) Print the last element of the matrix.
matrix[matrix.shape[0]-1,matrix.shape[1]-1]

43

In [97]:
#Or
matrix[-1,-1]

43

In [99]:
#c) Print the values of the first row.
matrix[0,:]

array([38, 33, 47])

In [100]:
#d) Print the values of the fourth row.
matrix[3,:]

array([38, 44, 21])

In [101]:
#e) Print the values first column.
matrix[:,0]

array([38, 29, 21, 38, 28])

In [102]:
#f) Calculate the total of the third row.
sum(matrix[2,:])

102

In [103]:
#g) Calculate the total of the fifth row.
sum(matrix[4,:])

97

In [104]:
#h) Calculate the total of the second column.
sum(matrix[:,1])

185

In [105]:
#i) Calculate the average of the values of the first row.
sum(matrix[0,:])/matrix.shape[1]

39.333333333333336

In [106]:
#j) Replace the first value of the matrix by 50.

matrix[0,0] = 50
matrix


array([[50, 33, 47],
       [29, 48, 35],
       [21, 34, 47],
       [38, 44, 21],
       [28, 26, 43]])

In [108]:
#k) Replace the last value of the matrix by 30.

matrix[-1,-1] = 30
matrix

array([[50, 33, 47],
       [29, 48, 35],
       [21, 34, 47],
       [38, 44, 21],
       [28, 26, 30]])

In [109]:
#l) Replace the second column of the matrix by the vector (20,30,20,30,20)


matrix[:,1] = [20,30,20,30,20]
matrix

array([[50, 20, 47],
       [29, 30, 35],
       [21, 20, 47],
       [38, 30, 21],
       [28, 20, 30]])

In [119]:
#m) Replace the first row of the matrix by the vector its average (calculated above).

#matrix[0,:] = [39.33,39.33,39.33]
matrix[0,] = np.mean(matrix[0,])
matrix

array([[39, 39, 39],
       [29, 30, 35],
       [21, 20, 47],
       [38, 30, 21],
       [28, 20, 30]])

In [111]:
#q) Calculate the sum of all the elements of the matrix

sum(sum(matrix))

466

In [121]:
#r) Calculate the sum of all the rows on the matrix

matrix.sum(0)  #axis=0 means sum across rows

array([155, 139, 172])

In [120]:
#s) Calculate the sum of all the colummns of the matrix

matrix.sum(1) #axis=1 means sum across rows

array([117,  94,  88,  89,  78])

In [114]:
help(matrix.sum)

Help on built-in function sum:

sum(...) method of numpy.ndarray instance
    a.sum(axis=None, dtype=None, out=None, keepdims=False)
    
    Return the sum of the array elements over the given axis.
    
    Refer to `numpy.sum` for full documentation.
    
    See Also
    --------
    numpy.sum : equivalent function



#### L. Matrix Operations

In [122]:
#Defining a matrix 'm'
m = np.array([[2,3],[4,5]])
m

array([[2, 3],
       [4, 5]])

In [123]:
#Defining a matrix 'n'
n = np.array([[1,0],[3,6]])
n

array([[1, 0],
       [3, 6]])

In [124]:
#Multiplication by scalar
2*n

array([[ 2,  0],
       [ 6, 12]])

In [125]:
#Matrix addition
m+n

array([[ 3,  3],
       [ 7, 11]])

In [126]:
#Matrix Multilication
np.dot(m,n)

array([[11, 18],
       [19, 30]])

In [127]:
#Preferred - for Matrix Multilication
np.matmul(m,n)

array([[11, 18],
       [19, 30]])

In [128]:
#Preferred - for Matrix Multilication
n @ m

array([[ 2,  3],
       [30, 39]])

In [129]:
#Element-wise array multilication
np.multiply(n,m)

array([[ 2,  0],
       [12, 30]])

#### Copying data

In [131]:
p = np.array([2,5,8,6])

In [132]:
q = p   #this does not create new object. Simply creates another reference q for the array

In [133]:
q

array([2, 5, 8, 6])

In [134]:
q[2] = 10

In [135]:
p   #Note: changing value in q changes value in p as well. So q and p are pointing to the same array.

array([ 2,  5, 10,  6])

In [136]:
q =  p.copy()  # Copy method is used to copy and create new array q.

In [137]:
q[2] = 100

In [138]:
p  # NOte after changing q this time, p does not change. So q is a copy of p.

array([ 2,  5, 10,  6])

In [2]:
help(np.append)

Help on function append in module numpy.lib.function_base:

append(arr, values, axis=None)
    Append values to the end of an array.
    
    Parameters
    ----------
    arr : array_like
        Values are appended to a copy of this array.
    values : array_like
        These values are appended to a copy of `arr`.  It must be of the
        correct shape (the same shape as `arr`, excluding `axis`).  If
        `axis` is not specified, `values` can be any shape and will be
        flattened before use.
    axis : int, optional
        The axis along which `values` are appended.  If `axis` is not
        given, both `arr` and `values` are flattened before use.
    
    Returns
    -------
    append : ndarray
        A copy of `arr` with `values` appended to `axis`.  Note that
        `append` does not occur in-place: a new array is allocated and
        filled.  If `axis` is None, `out` is a flattened array.
    
    See Also
    --------
    insert : Insert elements into an array.


In [5]:
a1 = np.array([1,2,3])
b1 = np.array([3,4,5])
c = np.append(a1,b1)
print(c)
print(a1+b1)

[1 2 3 3 4 5]
[4 6 8]
