## Numpy hands-on

In [46]:
import numpy as np

In [2]:
#check version
np.__version__

'1.14.3'

In [4]:
#Create a 1D array of numbers from 0 to 9
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
#Create a 3×3 numpy array of all True’s
np.ones([3,3],dtype=bool)

#Create a 3×3 numpy array of all False
np.zeros([3,3],dtype=bool)

array([[False, False, False],
       [False, False, False],
       [False, False, False]])

In [13]:
#Extract all odd numbers from arr
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

arr[arr%2!=0]

array([1, 3, 5, 7, 9])

In [15]:
#Replace all odd numbers in arr with -1
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

arr[arr%2!=0] = -1
arr

array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [23]:
# Replace all odd numbers in arr with -1 without changing arr
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

ar2 = arr.copy()
ar2[ar2%2!=0] = -1
print(arr)
ar2


[0 1 2 3 4 5 6 7 8 9]


array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])

In [27]:
#Convert a 1D array to a 2D array with 2 rows
a = np.arange(10)
#b = a.reshape(2,5)
#or
b = a.reshape(2,-1) # -1
b

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [52]:
#Stack arrays a and b vertically
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

np.concatenate([a,b],axis=0)
#np.vstack([a,b])

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

In [55]:
#Stack the arrays a and b horizontally.
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

np.concatenate([a,b],axis=1)
#np.hstack([a,b])

array([[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
       [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]])

In [87]:
#Create the following pattern without hardcoding. Use only numpy functions and the below input array a.
a = np.array([1,2,3])

np.concatenate([np.repeat(a,len(a)),np.tile(a,len(a))])
#np.r_[np.repeat(a,len(a)),np.tile(a,len(a))]

array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])

In [97]:
#get common items
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

np.unique(a[a==b])
#np.intersect1d(a,b)

array([2, 4])

In [107]:
#From array a remove all items present in array b
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])

a[np.where(a!=np.intersect1d(a,b))]
#np.setdiff1d(a,b)

array([1, 2, 3, 4])

In [109]:
#Get the positions where elements of a and b match
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

np.where(a==b)

(array([1, 3, 5, 7], dtype=int64),)

In [131]:
#Get all items between 5 and 10 from a
a = np.array([2, 6, 1, 9, 10, 3, 27])

#a[a>=5][a[a>=5]<=10]
a[(a>=5)&(a<=10)]

array([ 6,  9, 10])

In [172]:
#find pairwise max
a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])

'''
def maxx(x, y):
    """Get the maximum of two items"""
    c = np.zeros(len(x))
    c[np.where(x>=y)] = x[np.where(x>=y)]
    c[np.where(x<y)] = y[np.where(x<y)]
    return c

maxx(a,b)
'''
def maxx(x,y):
    if x>=y:
        return x
    else:
        return y

maxxx = np.vectorize(maxx)   ### ^o^

maxxx(a,b)

array([6, 7, 9, 8, 9, 7, 5])

In [184]:
#Swap rows/columns 1 and 2 in the array arr.
arr = np.arange(9).reshape(3,3)
arr[:,[1,0,2]] #swap col
arr[[1,0,2],:] #swap row
arr[::-1,:] #reverse row
arr[:,::-1] #reversee col

array([[2, 1, 0],
       [5, 4, 3],
       [8, 7, 6]])

In [214]:
#np.array(np.random.random([15])).reshape(5,3)
np.random.uniform(low=5,high=10,size=(5,3))

array([[9.44436504, 9.20747779, 8.64406211],
       [6.26617713, 5.31384295, 9.38667293],
       [5.22569961, 5.59657866, 7.1348897 ],
       [9.52578523, 7.50682804, 7.84712179],
       [9.71875241, 7.93553334, 9.34956832]])

In [351]:
#iris data
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='float',usecols=[0,1,2,3])

In [307]:
iris
#a=iris[:,0].astype('float')

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [308]:
#mean
a.mean()

#median manual
#b=a.copy()
#b.sort()
#b[int(len(b)/2-1)] #median

#using np
np.median(a)

#std
np.std(a)

#range
np.ptp(a)
#a.max()-a.min()

3.6000000000000005

In [309]:
#5th, 95th percentile
np.percentile(a,(5,95))
np.random.randint(iris.shape[1],size=10)

array([2, 0, 2, 2, 3, 3, 1, 1, 1, 2])

In [310]:
#fill na at random location
iris[np.random.randint(iris.shape[0],size=10),np.random.randint(iris.shape[1],size=10)] = np.nan

In [315]:
#check na 1st col
np.where(np.isnan(iris[:,0]))

np.where(np.isnan(iris)) #full data

(array([  6,   9,  36,  43,  53, 108, 111, 115, 142, 146], dtype=int64),
 array([2, 0, 1, 3, 2, 0, 0, 1, 0, 3], dtype=int64))

In [317]:
#slicing petallength (3rd column) > 1.5 and sepallength (1st column) < 5.0

iris[(iris[:,0]<5)&(iris[:,2]>1.5)]

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


array([[4.8, 3.4, 1.6, 0.2],
       [4.8, 3.4, 1.9, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [4.9, 2.4, 3.3, 1. ],
       [4.9, 2.5, 4.5, 1.7]])

In [347]:
#find rows without na with np
iris[np.sum(np.isnan(iris),axis=1)==0]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5. , 3.2, 1.2, 0.2],
       [4.9, 3

In [355]:
#corr b/w 1st and 3rd col
np.corrcoef(iris[:,0],iris[:,2])[0,1]

0.8717541573048718

In [373]:
#create bins based on range
bn = np.random.uniform(high=10,size=10)
np.digitize(bn, [0, 3, 5, 10])


array([1, 3, 2, 1, 1, 1, 3, 1, 3, 1], dtype=int64)

In [408]:
#add new col to np array
zz = iris[:,2]
z = np.random.random()*zz
z = z[:,np.newaxis]
np.concatenate([iris,z],axis=1)
#np.hstack([iris,z])

array([[5.1       , 3.5       , 1.4       , 0.2       , 1.04527705],
       [4.9       , 3.        , 1.4       , 0.2       , 1.04527705],
       [4.7       , 3.2       , 1.3       , 0.2       , 0.97061441],
       [4.6       , 3.1       , 1.5       , 0.2       , 1.1199397 ],
       [5.        , 3.6       , 1.4       , 0.2       , 1.04527705],
       [5.4       , 3.9       , 1.7       , 0.4       , 1.26926499],
       [4.6       , 3.4       , 1.4       , 0.3       , 1.04527705],
       [5.        , 3.4       , 1.5       , 0.2       , 1.1199397 ],
       [4.4       , 2.9       , 1.4       , 0.2       , 1.04527705],
       [4.9       , 3.1       , 1.5       , 0.1       , 1.1199397 ],
       [5.4       , 3.7       , 1.5       , 0.2       , 1.1199397 ],
       [4.8       , 3.4       , 1.6       , 0.2       , 1.19460235],
       [4.8       , 3.        , 1.4       , 0.1       , 1.04527705],
       [4.3       , 3.        , 1.1       , 0.1       , 0.82128911],
       [5.8       , 4.        , 1.

In [None]:
# to be continued