# Getting started with ndarray

**ndarrays** are time and space-efficient multidimensional arrays at the core of numpy used to create ndarrays. **ndarrays** means n dimensional arrays...where n = 1,2,3,...

## Creating Rank 1 numpy arrays

In [1]:
import numpy as np

an_array = np.array( [2, 4, 6] )
print( type(an_array) )

<class 'numpy.ndarray'>


In [2]:
# checking the shape of an array
print( an_array.shape )

(3,)


In [6]:
# since rank-1 array, need only one index to access each element
print( an_array[0], an_array[1], an_array[2])

2 4 6


In [7]:
# ndarray in numpy is mutable
an_array[0] = 20
an_array[0] = 'HY' # Error
print(an_array)

[20  4  6]


## Creating a Rank 2 numpy array
A rank 2 **ndarray** is one with dimensions. Notice the format is [ [row], [row] ].
2D arrays are great for representing matrices which are often useful in data science.

In [9]:
# creat 2d array
another = np.array([ [11, 12, 13],
                    [21, 22, 23] ])
# print array
print(another)

# shape of array...rows x coloumns
print('Shape:', another.shape)

# access elements
print('Retrieving 11, 12 and 21:', another[0,0], another[0,1], another[1,0])

[[11 12 13]
 [21 22 23]]
Shape: (2, 3)
Retrieving 11, 12 and 21: 11 12 21


## Other ways to create numpy arrays
Here we create a number of different size with different shapes and different pre-filled values numpy has a number of built-in methods which helps us quickly and easily create multidimensional arrays

In [11]:
import numpy as np

# create a 2x2 array of zeros
ex1 = np.zeros( (2,2) )
print(ex1)

[[0. 0.]
 [0. 0.]]


In [12]:
# create a 2x2 array filled with 5.0
ex2 = np.full( (2,2), 5.0 )
print(ex2)

[[5. 5.]
 [5. 5.]]


In [14]:
# create a 2x2 array with diagonal 1s and others 0s
ex3 = np.eye(2, 2)
print(ex3)

[[1. 0.]
 [0. 1.]]


In [17]:
# create array of ones
ex4 = np.ones( (1,2) )
print(ex4)

[[1. 1.]]


In [21]:
# notice that the ex4 is actually rank 2, it is a 1x2 array
print(ex4.shape)

# ...which means two indexes needed to access an element
print()
print(ex4[0, 1])

(1, 2)

1.0


In [None]:
# create array of random float between 0 and 1
import numpy as np
ex5 = np.random.random( (2, 3) )
print(ex5)

print()

.



# Array indexing 

## Slice indexing:

Similar to the se of slice indexing with lists and strings, slice indexing can be used
to pull out sub-regions of ndarrays.

In [16]:
import numpy as np

# rank 2 array of shape (3, 4)
an_array = np.array([[11, 12, 13, 14],
                     [21, 22, 23, 24],
                     [31, 32, 33, 34]
                    ])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [20]:
# use array slicing to get a subarray of 2x2 

a_slice = an_array[:2, 1:3]  
print(a_slice)

# np_slice = np.array(an_array[:2, 1:3]) ...this np-slice rather create copy
# modifying this slice, DO NOT modify the underlyiing array

[[500  13]
 [ 22  23]]


In [18]:
# when you modify a slice, you actually modify the underlyiing root array

print('Before:', an_array[0, 1])
a_slice[0, 0] = 500
print("After:", an_array[0, 1])

Before: 12
After: 500


## Using both integer & slice indexing

combination of integer and slice indexing to create different shaped matrices.

In [21]:
# create a Rank 2 array of shape (3, 4)

an_array = np.array([ [11,12,13,14], [21,22,23,24], [31,32,33,34] ])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [25]:
# using both integer indexing and slicing generates and array of lower rank, 1D-array

row_rank1 = an_array[1, :]  # rank 1 view
print(row_rank1, row_rank1.shape)  # notice, only [ ]

[21 22 23 24] (4,)


In [26]:
# using slicing alone generates an array of the same rank as the an_array

row_rank2 = an_array[1:2, :]  # rank 2 view
print(row_rank2, row_rank2.shape)  # notice the [[ ]] 

[[21 22 23 24]] (1, 4)


In [34]:
# same thing for columns of an array

col_rank1 = an_array[:, 1]
col_rank2 = an_array[:, 1:2]

print(col_rank1, col_rank1.shape)  # rank1
print()
print(col_rank2, col_rank2.shape)  # rank2

[12 22 32] (3,)

[[12]
 [22]
 [32]] (3, 1)


## Array Indexing for changing elements

sometimes it is useful to use an array of indexes to access or change elements

In [41]:
# create new array 

an_array = np.array([ [11,12,13], [21,22,23], [31,32,33], [41,42,43] ])
print("Original Array")
print(an_array)

Original Array
[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [42]:
# creating an array of indices

col_indices = np.array([0, 1, 2, 0])   # the above are values, these index
print("\nCol indices picked:", col_indices)

row_indices = np.arange(4)
print("\nRow indices picked:", row_indices)


Col indices picked: [0 1 2 0]

Row indices picked: [0 1 2 3]


In [43]:
# pairing of col_indices and row_indices

# for row,col in zip(row_indices, col_indices):
#     print(row, ",", col)

In [45]:
# select one element for each row
print("values in the array at those indices: ", an_array[row_indices, col_indices])

values in the array at those indices:  [11 22 33 41]


In [47]:
# passing list in the array is considered index not value...as an_array[2, 4]
print( "same as typing the indices as list", an_array[[0,1,2], [0,1,2]] )

tryyy [11 22 33]


In [None]:
# change one element from each row using the indices selected

an_array[row_indices, col_indices] += 10000000

print("\nChanged of Array: ")
print(an_array)

. 


## Boolean Indexing


### Array Indexing for changing elements

In [48]:
# create a 3x2 array
an_array = np.array([ [11,12], [21,22], [31,31] ])
print(an_array)

[[11 12]
 [21 22]
 [31 31]]


In [49]:
# create a filter which will be boolean values for whether each element 
# meets the criteria
filter = (an_array > 15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]])

In [52]:
# selecting just element meeting the criteria
print(an_array[filter])

# in short
an_array[(an_array > 2)]
an_array[(an_array % 2 == 0)]

[21 22 31 31]


array([11, 12, 21, 22, 31, 31])

In [53]:
# changing element using filter
an_array[( an_array % 2 == 0 )] += 100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31  31]]


.




# Datatypes and Array Operations



## Datatypes:

In [4]:
import numpy as np

ex1 = np.array([11,12]) # take note, python assigns the data type
print(ex1.dtype)


int64


In [5]:
import numpy as np

ex2 = np.array([11.0,12.0]) # take note, python assigns the data type
print(ex2.dtype)


float64


In [9]:
# forcing float to integer 
ex3 = np.array([11.0,12.0], dtype=np.int64)
print(ex3.dtype)
print("\n", ex3)

int64

 [11 12]


In [10]:
# forcing int to float

ex5 = np.array([11,12], dtype=np.float64)
print(ex5.dtype)
print("\n", ex5)

float64

 [11. 12.]


## Arithmetic Array Operations:

In [13]:
x = np.array( [[11, 12], [21, 22]], dtype=np.int)
y = np.array( [[11.1, 12.1], [21.1, 22.1]], dtype=np.float64)

print(x)
print()
print(y)

[[11 12]
 [21 22]]

[[11.1 12.1]
 [21.1 22.1]]


In [14]:
# add

print(x + y) # option 1
print()
print(np.add(x, y)) # alt 2

[[22.1 24.1]
 [42.1 44.1]]

[[22.1 24.1]
 [42.1 44.1]]


In [15]:
# substract

print(x - y)
print()
print(np.subtract(x, y)) # alt 2

[[-0.1 -0.1]
 [-0.1 -0.1]]

[[-0.1 -0.1]
 [-0.1 -0.1]]


In [16]:
# multiply

print(x * y)
print()
print(np.multiply(x, y)) # alt 2

[[122.1 145.2]
 [443.1 486.2]]

[[122.1 145.2]
 [443.1 486.2]]


In [17]:
# divide

print(x / y)
print()
print(np.divide(x, y)) # alt 2

[[0.99099099 0.99173554]
 [0.99526066 0.99547511]]

[[0.99099099 0.99173554]
 [0.99526066 0.99547511]]


In [18]:
# square root

print(np.sqrt(x))

[[3.31662479 3.46410162]
 [4.58257569 4.69041576]]


In [19]:
# exponent (e ** x)
print(np.exp(x))

[[5.98741417e+04 1.62754791e+05]
 [1.31881573e+09 3.58491285e+09]]


.

# Statistical Methods, Sorting and Set Operations:

In [29]:
# setup a random 2x4 matrix

arr = np.random.randn(2, 5)
arr2 = np.random.random((2, 5))
print(arr2)
print()
print(arr)

[[0.72929847 0.90611521 0.96039891 0.56232129 0.16741792]
 [0.79381906 0.13130395 0.42898136 0.19317685 0.55886923]]

[[ 1.00603231 -0.2896228   0.66540286  0.53709587 -0.41279239]
 [ 1.03318714  0.6251113   0.02966124 -0.51711174 -0.14527607]]


In [30]:
# mean of all elements ... sum all element divided by the number 

print(arr.mean())

0.25316877235458535


In [31]:
# compute means by row
# for eact row...(sum elements)/(number of elements) 

print(arr.mean(axis=1))

[0.30122317 0.20511438]


In [None]:
# compute means by colomn

print(arr.means(axis=0))

In [41]:
# sum all elements

print(arr.sum())

2.5316877235458537


In [44]:
# computing medians of elements

print(np.median(arr))
print(np.median(arr, axis=1))

0.2833785533610662
[0.53709587 0.02966124]


## Sorting 

In [70]:
# create a rank1 10 element array of randoms

unsorted = np.random.randn(5)
print(unsorted)

[ 0.7952785   0.25786755 -1.01604451  1.42941117 -0.45387763]


In [71]:
# create a copy and sort

sorted = np.array(unsorted)  # NB: np.array added makes a COPY
sorted.sort()

print(sorted)
print()
print(unsorted)

[-1.01604451 -0.45387763  0.25786755  0.7952785   1.42941117]

[ 0.7952785   0.25786755 -1.01604451  1.42941117 -0.45387763]


In [72]:
# sorting the original....inplace sorting

unsorted.sort()
print(unsorted)

[-1.01604451 -0.45387763  0.25786755  0.7952785   1.42941117]


## Finding Unique Elements:

In [3]:
import numpy as np
arr = np.array([1,2,1,4,2,1,4,2])
print(np.unique(arr))

[1 2 4]


## Set operations with np.array data type 

In [5]:
set1 = np.array(["desk", "chair", "bulb"])
set2 = np.array(["lamp", "bulb", "chair"])
print(set1, set2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [7]:
print(np.intersect1d(set1, set2))    
# for any np.sth(you need to pass and array)
# 1d cus of the array

['bulb' 'chair']


In [8]:
print(np.union1d(set1, set2))

['bulb' 'chair' 'desk' 'lamp']


In [10]:
# elements that are in set1 buh not in set2
# USE set_different
print(np.setdiff1d(set1, set2))

['desk']


In [12]:
# which element of set1 is in set2
print(np.in1d(set1, set2)) 

[False  True  True]


.


# Broadcasting

...an advanced feature in numpy and 
_ makes array operations more convenientz


In [20]:
start = np.zeros((4, 3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [40]:
add_rows = np.array([1, 0, 2])
print(add_rows.shape)

print(add_rows)

(3,)
[1 0 2]


In [22]:
# add to each row of 'start' using broadcasting

y = start + add_rows   
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [23]:
# create an 4x1 ndarray to broadcast across columns 

add_cols = np.array([ [0, 1, 2, 3] ])
add_cols = add_cols.T   # transpose

print(add_cols)

[[0]
 [1]
 [2]
 [3]]


In [25]:
# adding to each colunm of 'start' using broadcasting 
y = start + add_cols
print(y)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [26]:
# broadcasting in both directions ... adding a scalar throughout
add_scalar = np.array([1])
print(start + add_scalar)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


# Speedtest: ndarrays vs lists

In [36]:
from numpy import arange
from timeit import Timer

size = 1000000
timeits = 1000

nd_array = arange(size)
print(type(nd_array))
print(nd_array)

timer_numpy = Timer("nd_array.sum()", "from __main__ import nd_array")

print("\nTime taken by numpy ndarray: %f seconds" % (timer_numpy.timeit(timeits)/timeits))

<class 'numpy.ndarray'>
[     0      1      2 ... 999997 999998 999999]

Time taken by numpy ndarray: 0.001718 seconds


In [37]:
a_list = list(range(size))
print( type(a_list) )

timer_list = Timer("sum(a_list)", "from __main__ import a_list")
print("Time taken by list: %f seconds" % 
     (timer_list.timeit(timeits)/ timeits))

<class 'list'>
Time taken by list: 0.018139 seconds
