## Import Statements

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Creating NumPy Arrays

In [2]:
# List to 1D array
print(np.array([2, 3, 4]))

# List of tuples to 2D array
print(np.array([(2, 3, 4), (5, 6, 7)]))

# Empty array
print(np.empty(5))
print(np.empty((5,4)))
print(np.empty((5,4,3)))

# Array of 1s
print(np.ones((5, 4)))

# Specifying the datatype
print(np.ones((5, 4), dtype=np.int_))

[2 3 4]
[[2 3 4]
 [5 6 7]]
[0.   0.25 0.5  0.75 1.  ]
[[6.23042070e-307 4.67296746e-307 1.69121096e-306 1.69120688e-306]
 [8.34441742e-308 8.90104239e-307 1.33511290e-306 1.78018403e-306]
 [1.24610383e-306 1.69118108e-306 8.06632139e-308 1.20160711e-306]
 [1.69119330e-306 1.78019082e-306 1.78020984e-306 6.23053954e-307]
 [6.23056331e-307 1.42418987e-306 8.34451928e-308 8.34402698e-308]]
[[[1.18454878e-311 1.18454903e-311 1.18454978e-311]
  [1.18455235e-311 1.18454959e-311 1.18455235e-311]
  [1.18455221e-311 1.18455221e-311 1.18453282e-311]
  [1.18455235e-311 6.95192584e-310 1.18453323e-311]]

 [[1.18454949e-311 1.18453268e-311 1.18453302e-311]
  [1.18453267e-311 1.18453279e-311 1.18453279e-311]
  [1.18455235e-311 1.18453270e-311 1.18453323e-311]
  [1.18455235e-311 1.18455235e-311 1.18455235e-311]]

 [[1.18455211e-311 1.18455235e-311 1.18453279e-311]
  [1.18455235e-311 1.18455235e-311 1.18453308e-311]
  [1.18455235e-311 1.18455221e-311 1.18455235e-311]
  [1.18453278e-311 1.18453277e-311

## Generating Random Numbers

In [3]:
# Generate an array full of random numbers, uniformly samples from [0.0, 1.0)
print(np.random.random((5, 4))) # pass in a size tuple

# Generate an array full of random numbers, uniformly samples from [0.0, 1.0)
print(np.random.rand(5, 4)) # function arguments (not a tuple)

# Sample numbers from a Gaussian (normal) distribution
print(np.random.normal(size=(2, 3))) # "standard normal" (mean = 0, s.d. = 1)

# Sample numbers from a Gaussian (normal) distribution
print(np.random.normal(50, 10, size=(2, 3))) # change mean to 50 and s.d. to 10

# Random integers
print(np.random.randint(1)) # a single integer in [0, 10)
print(np.random.randint(0, 10)) # same as above, specifying [low, high) explicit
print(np.random.randint(0, 10, size=5)) # 5 random integers as a 1D array
print(np.random.randint(0, 10, size=(2, 3))) # 2x3 array of random integers

[[0.21654607 0.61554334 0.60654829 0.08717398]
 [0.78457883 0.38895334 0.34247256 0.9551279 ]
 [0.07776038 0.49591149 0.24510733 0.46375647]
 [0.07833133 0.68768963 0.40724305 0.0738695 ]
 [0.59350296 0.27896575 0.14043298 0.32256538]]
[[0.55911848 0.48809622 0.11426459 0.04113566]
 [0.21803103 0.28480249 0.92699435 0.33461868]
 [0.03653213 0.3813932  0.53383922 0.60309082]
 [0.3576854  0.87851714 0.8139706  0.71124933]
 [0.28174307 0.89125681 0.19729061 0.22519699]]
[[-0.83327277 -0.3119596   1.18436951]
 [-1.51978714  1.27072139 -0.03426804]]
[[47.43639064 55.97025255 43.00215285]
 [42.88362114 58.85867494 43.24195875]]
0
1
[7 9 2 4 1]
[[4 2 0]
 [0 9 0]]


## Array Attributes

In [4]:
a = np.random.random((5, 4))  # 5x4 array of random numbers
print(a)
print(a.shape)

print(a.shape[0])  # number of rows
print(a.shape[1])  # number of columns

print(len(a.shape))
print(a.size)
print(a.dtype)

[[0.61516588 0.71548606 0.61189044 0.23478731]
 [0.26094241 0.45786036 0.54976956 0.20931641]
 [0.45516464 0.77319945 0.28263589 0.59582256]
 [0.37000768 0.46567877 0.88890181 0.01994777]
 [0.76327151 0.73414172 0.7371544  0.84999079]]
(5, 4)
5
4
2
20
float64


## Operations on Arrays

In [5]:
np.random.seed(693)  # seed the random number generator
a = np.random.randint(0, 10, size=(5, 4)) # 5x4 random integers in [0, 10)
print("Array:\n", a)

# Sum of all elements
print("Sum of all elements:", a.sum())

# Iterate over rows, to compute sum of each columns
print("Sum of each column:\n", a.sum(axis=0))

# Iterate over columns to compute sume of each rows
print("Sum of each row:\n", a.sum(axis=1))

# Statistics: min, max, mean (across rows, cols, and overall)
print("Minimum of each colulmn:\n", a.min(axis=0))
print("Maximum of each row:\n", a.max(axis=1))
print("Mean of all elements:", a.mean()) # leave out axis arg.

Array:
 [[2 0 5 1]
 [1 3 4 4]
 [9 2 9 1]
 [9 3 7 5]
 [4 7 0 3]]
Sum of all elements: 79
Sum of each column:
 [25 15 25 14]
Sum of each row:
 [ 8 12 21 24 14]
Minimum of each colulmn:
 [1 0 0 1]
Maximum of each row:
 [5 4 9 9 7]
Mean of all elements: 3.95


## Locate maximum value

In [6]:
def get_max_index(a):
    """Return the index of the maximum value in given 1D array."""
    # Quiz: Your code here
    return np.argmax(a)

In [7]:
a = np.array([9, 6, 2, 3, 12, 14, 7, 10], dtype=np.int32)  # 32-bit integer array
print("Array:", a)

# Find the maximum and its index in array
print("Maximum value:", a.max())
print("Index of max.:", get_max_index(a))

Array: [ 9  6  2  3 12 14  7 10]
Maximum value: 14
Index of max.: 5


## Using time function

In [8]:
import time

In [9]:
t1 = time.time()
print("ML4T")
t2 = time.time()
print("The time taken by print statement is ", t2 - t1, " seconds")

ML4T
The time taken by print statement is  0.0  seconds


## How Fast is Numpy?

In [10]:
from time import time

def how_long(func, *args):
    """Execute function with given arguments, and measure execution time."""
    t0 = time()
    result = func(*args) # all argument are passed in as-is
    t1 = time()
    return result, t1 - t0

def manual_mean(arr):
    """Compute mean (average) of all elements in the given 2D array."""
    sum = 0
    for i in range(0, arr.shape[0]):
        for j in range(0, arr.shape[1]):
            sum = sum + arr[i, j]
    return sum / arr.size

def numpy_mean(arr):
    """Compute mean(average) using Numpy"""
    return arr.mean()

In [11]:
nd1 = np.random.random((1000, 10000)) # use a sufficiently large array

# Time the two functions, retrieving results and execution times
res_manual, t_manual = how_long(manual_mean, nd1)
res_numpy, t_numpy = how_long(numpy_mean, nd1)
print("Manual: {:.6f} ({:.3f} secs.) vs. Numpy: {:.6f} ({:.3f} secs.)".format(res_manual, t_manual, res_numpy, t_numpy))

# Make sure both give us the same answer (upto some precision)
assert abs(res_manual - res_numpy) <= 10e-6, "Results aren't equal!"

# Compute speedup
speedup = t_manual / t_numpy
print("Numpy mean is", speedup, "times faster than manual for loops.")

Manual: 0.500016 (2.443 secs.) vs. Numpy: 0.500016 (0.009 secs.)
Numpy mean is 271.69129189647856 times faster than manual for loops.


## Accessing Array Elements

In [12]:
a = np.random.rand(5, 4)
print("array:\n", a)

# Accessing element at position (3, 2)
element = a[3, 2]
print(element)

# Element in defined range
print(a[0, 1:3])

# Top-left corner
print(a[0:2, 0:2])

# Slicing
# Note: Slice n:m:t specifies a range that starts n, and stops before m, in
print( a[:, 0:3:2]) # will select columns 0, 2 for every row

# Assigning a value to a particular location
a[0, 0] = 1
print("\nModified (replaced one element):\n", a)

# Assigning a single value to an entire row
a[0,:] = 2
print("\nModified (replaced a row with a single value):\n", a)

# Assigning a list to a column in an array
a[:, 3] = [1, 2, 3, 4, 5]
print("\nModified (replaced a column with a list):\n", a)

array:
 [[0.76484993 0.19443186 0.15294266 0.79396197]
 [0.7152339  0.68874783 0.55787223 0.36928588]
 [0.7906318  0.70489628 0.31082212 0.83572857]
 [0.38303999 0.21038991 0.26224769 0.31709792]
 [0.36169014 0.65767671 0.03944179 0.05352682]]
0.26224769384784963
[0.19443186 0.15294266]
[[0.76484993 0.19443186]
 [0.7152339  0.68874783]]
[[0.76484993 0.15294266]
 [0.7152339  0.55787223]
 [0.7906318  0.31082212]
 [0.38303999 0.26224769]
 [0.36169014 0.03944179]]

Modified (replaced one element):
 [[1.         0.19443186 0.15294266 0.79396197]
 [0.7152339  0.68874783 0.55787223 0.36928588]
 [0.7906318  0.70489628 0.31082212 0.83572857]
 [0.38303999 0.21038991 0.26224769 0.31709792]
 [0.36169014 0.65767671 0.03944179 0.05352682]]

Modified (replaced a row with a single value):
 [[2.         2.         2.         2.        ]
 [0.7152339  0.68874783 0.55787223 0.36928588]
 [0.7906318  0.70489628 0.31082212 0.83572857]
 [0.38303999 0.21038991 0.26224769 0.31709792]
 [0.36169014 0.65767671 0.0

## Accessing Elements

In [13]:
a = np.random.rand(5)

# accessing using list of indices
indices = np.array([1,2,3,3])
print(a[indices])

a = np.array([(20,25,10,23,26,32,10,5,0),(0,2,50,20,0,1,28,5,0)])
print (a)

# calculating mean
mean = a.mean()
print(mean)

# masking
print(a[a<mean])

# masking2
a[a<mean]= mean
print(a)

[0.19925213 0.82660771 0.13349003 0.13349003]
[[20 25 10 23 26 32 10  5  0]
 [ 0  2 50 20  0  1 28  5  0]]
14.277777777777779
[10 10  5  0  0  2  0  1  5  0]
[[20 25 14 23 26 32 14 14 14]
 [14 14 50 20 14 14 28 14 14]]


## Arithmetic Operations

In [14]:
a = np.array([(1, 2, 3, 4, 5), (10, 20, 30, 40, 50)])
print("Original array a:\n", a)

# Multiple a by 2
print("\nMultiply a by 2:\n", 2 * a)

# Divide a by 2
print("\nDivide a by 2:\n", a / 2)

# Divide a by 2
print("\nDivide a by 2:\n", a / 2.0)

b = np.array([(100, 200, 300, 400, 500), (1, 2, 3, 4, 5)])
print("\nOriginal array b:\n", b)

# Add the two arrays
print("\nAdd a + b:\n", a + b)

# Multiply a and b
print("\nMultiply a and b:\n", a * b)

# Divide a by b
print("\nDivide a by b:\n", a / b)

Original array a:
 [[ 1  2  3  4  5]
 [10 20 30 40 50]]

Multiply a by 2:
 [[  2   4   6   8  10]
 [ 20  40  60  80 100]]

Divide a by 2:
 [[ 0.5  1.   1.5  2.   2.5]
 [ 5.  10.  15.  20.  25. ]]

Divide a by 2:
 [[ 0.5  1.   1.5  2.   2.5]
 [ 5.  10.  15.  20.  25. ]]

Original array b:
 [[100 200 300 400 500]
 [  1   2   3   4   5]]

Add a + b:
 [[101 202 303 404 505]
 [ 11  22  33  44  55]]

Multiply a and b:
 [[ 100  400  900 1600 2500]
 [  10   40   90  160  250]]

Divide a by b:
 [[ 0.01  0.01  0.01  0.01  0.01]
 [10.   10.   10.   10.   10.  ]]
