#**NumPy Basics: Arrays and Vectorized Computation**

Efficient and fast data analytic operations
* mathematical operations
* data cleaning
* data filtering
* data transformation

In [2]:
import numpy as np

##**NumPy Example: Mathematical Operations**
*   Perform complex computations on entire arrays without the need for Python 'for' loops
*   Fast

In [3]:
# create NumPy array, value from 0 to 1000000
array_NumPy = np.array([10,20,30,40,50,60,70,80,90,100])
array_NumPy

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [4]:
my_list = [10,20,30,40,50,60,70,80,90,100]
my_list

[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

Mathematical computations on *'List'* vs NumPy *'Array'*

In [5]:
# List: all element multiplication
for i in range(len(my_list)):
  my_list[i] = my_list[i]*2

my_list[:10] #show first 10 elements in list

[20, 40, 60, 80, 100, 120, 140, 160, 180, 200]

In [6]:
# NumPy Array: all element multiplication
array_NumPy = array_NumPy * 2
array_NumPy

array([ 20,  40,  60,  80, 100, 120, 140, 160, 180, 200])



---


## **Creating NumPy Array**

**NumPy array from list**

> 1D NumPy Array

In [None]:
import numpy as np

dataList = [9,8,7,6,5,4,3,2,1]
print(type(dataList))
dataNdarray = np.array(dataList)
print(type(dataNdarray))

In [None]:
# 1D NumPy array
dataNdarray.shape

In [None]:
dataNdarray.dtype

> 2D NumPy Array 

In [None]:
# 2D NumPy array
dataNdarray2 = np.array([[1.0,3.0,5.0,7.0,9.0],[2.0,4.0,6.0,8.0,10.0]])
print(dataNdarray2)


In [None]:
print(dataNdarray2.shape)
# (row, column)

In [None]:
print(dataNdarray2.dtype)

**Creating new NumPy array with value=0**


*   np.zeros(size)



In [None]:
array1 = np.zeros(4)
print(array1)

In [None]:
# 2D np.zeros
array2 = np.zeros([2,5])
print(array2)

**Creating new NumPy array with value=1**

In [None]:
array1 = np.ones(7)
print(array1)

**Creating new NumPy array with range values**

*   ex. ranges from 0 to 10
*   [0, 1, 2, 3, ..., 10 ]

In [None]:
array1 = np.arange(10)
array1

In [None]:
# Custom np.arange
array2 = np.arange(20,25)
array2

In [None]:
# np.arange(start value, stop value, increment)
array3 = np.arange(50, 100, 5)
array3

**Creating new NumPy array with random values**

In [None]:
array1 = np.random.rand(5)
array1



---



## **Computation Time with NumPy Array**

In [None]:
# Create array/list from 0 to 1000000
array_NumPy = np.arange(1000000)
my_list = list(range(1000000))

# Do Multiple(x10 times) Mathematical Operations
print("NumPy timing:")
%time for _ in range(10): array_NumPy2 = array_NumPy * 2
print("List timing:")
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]



---


## **Arithmetic with NumPy Arrays**



> **array with array**



In [None]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

In [None]:
arr - arr

In [None]:
arr*arr



> **array with scalar**



In [None]:
arr + 10

In [None]:
arr/4

In [None]:
# power
print('original arr = '+str(arr))
print('\narr powered by 2 = '+str(arr ** 2))



---



## **Basic NumPy Indexing**

In [None]:
arr = np.arange(10)*10
arr

In [None]:
# index begins from 0
arr[0]

In [None]:
arr[5]

In [None]:
# last index
arr[-1]

In [None]:
arr[-2]

In [None]:
# get array from index 0 to index 5
arr[0:5]

In [None]:
# assigning values
arr[0:5] = -1
arr

In [None]:
# all element
arr[:]

In [None]:
arr[:] = 50
arr



> 2 Dimentionals Array Indexing





In [None]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(arr2d)
print(arr2d.shape)

In [None]:
# whole row
arr2d[0]

In [None]:
print(arr2d[0][2])
print(arr2d[0,2])

In [None]:
# select first 2 rows
arr2d[:2]

In [None]:
# whole column
arr2d[:, :1]



---



## **NumPy Conditional Indexing**

In [None]:
arr = np.arange(10)*10
print('original values = '+str(arr))

In [None]:
# return indice where value<50
np.where(arr<50)

In [None]:
# np.where(condition, x, y)
## for all element in Array, if condition=True -> yield x otherwise -> yield y

arr = np.arange(10)*10
print('original values = '+str(arr))

np.where(arr<=20, -1, arr)

In [None]:
np.where(arr<=20, arr+1000, arr)

In [None]:
##### USEFUL #######
# get Array with condition=true
newArray = arr[np.where(arr<50)]
newArray

## **NumPy Functions**



*   **Mathematical Functions**



In [None]:
arr = np.arange(10)
print(arr)

In [None]:
# square root
np.sqrt(arr)

In [None]:
# absolute
arr = np.arange(10)*-1
print('original values = '+str(arr))

arr = np.abs(arr)
print('absolute values = '+str(arr))

In [None]:
# summation
arr = np.arange(10)
print('original values = '+str(arr))
arr = np.sum(arr)
print('summation values = '+str(arr))

In [None]:
# Sorting
arr = np.random.randn(10)
print('original values = '+str(arr))

np.sort(arr)

In [None]:
# Pi
np.pi

In [None]:
# Sin --> np.sin(radian angle)
out = np.sin(np.pi/2)
print("Sin(1/2*Pi) = "+str(out))

# np.arange(start, stop, step)
arr = np.arange(0, 2*np.pi, np.pi/2)
print(arr)
np.sin(arr)



*   **Statistical Functions**



In [None]:
arr = np.arange(10)
print('original values = {0}'.format(arr))

In [None]:
# Max
print('maximum values = {0}'.format(np.max(arr)))

In [None]:
# Max
print('minimum values = {0}'.format(np.min(arr)))

In [None]:
# Average
print('mean values = {0}'.format(np.mean(arr)))

In [None]:
# Standard deviation
print('Std = {0}'.format(np.std(arr)))
# Variance
print('Variance = {0}'.format(np.var(arr)))



---
## **NumPy Save and Load Data Files**


In [None]:
# Save to numpy file
arr = np.arange(10) * 2
np.save('some_array', arr)

In [None]:
# Load numpy file
new_array = np.load('some_array.npy')
new_array

In [None]:
# Load data from file
# Make sure 
Dataset = np.loadtxt(fname='sample_data/california_housing_train.csv', delimiter=',', skiprows=1)
Dataset



---


## **NumPy Advanced Functions**


*   Linear Regression https://numpy.org/doc/stable/reference/generated/numpy.polyfit.html


In [None]:
arrayX = np.array([1,2,3,4,5,6,7,8,9,10])
arrayY = np.array([25, 30, 35, 40, 45, 50, 55, 60, 65, 70])

In [None]:
# np.polyfit for Linear Regression
## Parameters: np.polyfit(x, y, deg) | x, y = x-axis and y-axis | deg = degree, 1=linear regression
## Returns: m = Slope, b = Y-interception (equ. Y = mX+b)

m, b = np.polyfit(arrayX, arrayY, 1)
print('slope = '+str(m))
print('interception = '+str(b))

In [None]:
# from linear equation: Y = mX + b
calcY = m*arrayX + b

print('actual Y = '+str(arrayY))
print('calculated Y = '+str(calcY))

>**Advanced Fundtion cont.**
>*   Correlation https://numpy.org/doc/stable/reference/generated/numpy.corrcoef.html

In [None]:
# generate data
x = np.arange(10)
y = np.arange(10)*-1
print(x)
print(y)

In [None]:
# np.corrcoef return Pearson correlation coefficient, 
## 1.0 or -1.0 = perfect correlation
## 0 = no correlation

np.corrcoef(x,y)
# return: 
# [0,0] = correlation between x and x -> always 1
# [0,1] = correlation between x and y 
# [1,0] = correlation between y and x 
# [1,1] = correlation between y and y -> always 1

In [None]:
print('Pearson Correlation Coef. between x and y = '
      +str(np.corrcoef(x,y)[0,1]))

In [None]:
# Another CorrCoef Example
x = np.random.rand(10)
y = np.random.rand(10)
print(x)
print(y)

np.corrcoef(x,y)
# return: 
# [0,0] = correlation between x and x -> always 1
# [0,1] = correlation between x and y 
# [1,0] = correlation between y and x 
# [1,1] = correlation between y and y -> always 1