# CMSC 197 Machine Learning: Numpy Quiz 1
Submitted by: Trixelle Johanna Juan Tong 

## Import numpy library
We will be importing numpy for us to work on the arrays and use its pre-built functions

In [1]:
import numpy as np

## Part 1
### Create y_hat 1D array
- Initialize y_hat with 20 linearly spaced numbers between -1 and 1
- We can achieve this by using the __np.linspace(start, end, num)__ function where:
    - start and end values are the range of the interval
    - num is the number of values for the linearly spaced we need at the specific range
- The function will return evenly spaced numbers at the given range

In [2]:
# store the evenly spaced numbers at y_hat variable
# used the format() method in using {} placeholders for formatting print statements
y_hat = np.array(np.linspace(-1, 1, num=20))
print("y_hat array: {}".format(y_hat))

y_hat array: [-1.         -0.89473684 -0.78947368 -0.68421053 -0.57894737 -0.47368421
 -0.36842105 -0.26315789 -0.15789474 -0.05263158  0.05263158  0.15789474
  0.26315789  0.36842105  0.47368421  0.57894737  0.68421053  0.78947368
  0.89473684  1.        ]


### Create y 1D array
- Initialize y with 20 linearly spaced numbers between -0 and 1
- We can achieve this by using the __np.linspace(start, end, num)__ function where:
    - start and end values are the range of the interval
    - num is the number of values for the linearly spaced we need at the specific range
- The function will return evenly spaced numbers at the given range

In [3]:
# store the evenly spaced numbers at y variable
y = np.array(np.linspace(0, 1, num=20))
print("y array: {}".format(y))

y array: [0.         0.05263158 0.10526316 0.15789474 0.21052632 0.26315789
 0.31578947 0.36842105 0.42105263 0.47368421 0.52631579 0.57894737
 0.63157895 0.68421053 0.73684211 0.78947368 0.84210526 0.89473684
 0.94736842 1.        ]


### Computing for the shape and dimension of y_hat and y
- We will be using __shape__ and __ndim__ attributes
- The shape attribute returns a tuple containing the number of rows and columns of the array
- The ndim attribute returns the array dimensions

In [4]:
# printing the y_hat and y shape and dimension
print("y_hat shape = {}".format(y_hat.shape))
print("y_hat dimension = {}".format(y_hat.ndim))
print("y shape = {}".format(y.shape))
print("y dimension = {}".format(y.ndim))

y_hat shape = (20,)
y_hat dimension = 1
y shape = (20,)
y dimension = 1


### Creating each of the loss functions
- Create the following loss functions:
    - Mean Squared Error
    - Mean Absolute Error
    - Mean Squared Logarithmic Error
    - Mean Absolute Percentage Error

In [5]:
# Parameters are y_hat and y
# each function will return the loss value

#Mean Squared Error function
def mean_squared_error(y_hat, y):
    squared_diff = np.square(y_hat-y)
    L = squared_diff.mean()
    return L
    
# Mean Absolute Error function
def mean_abs_error(y_hat, y):
    abs_sum = abs(y_hat-y)
    L = abs_sum.mean()
    return L

# Mean Squared Logarithmic Error
# should only contain non-negative target variables
def mean_squared_log_error(y_hat, y):
    log_squared = np.square(np.log(y_hat+1)-np.log(y+1))
    L = log_squared.mean()
    
    if L == np.inf:
        return "ERROR: Target variable contains negative values"
    else:
        return L

# Mean Absolute Percentage Error
# may return an undefined value if there's 0 since values cannot be divided to 0 
def mean_abs_percent_error(y_hat, y):
    abs_sum = abs((y_hat-y) / y) * 1
    L = abs_sum.mean()
    
    if L == np.inf:
        return "ERROR: values contain zero"
    else:
        return L


### Calling the different loss functions
- The functions will return the loss value
- Note: MSLE needs the target variable to be non-negative values, but there are negative values in y_hat array

In [6]:
print("Mean Squared Error = ", mean_squared_error(y_hat, y))
print("Mean Absolute Error = ", mean_abs_error(y_hat, y))
print("Mean Squared Logarithmic Error = ", mean_squared_log_error(y_hat, y))
print("Mean Absolute Percentage Error = ", mean_abs_percent_error(y_hat, y))

Mean Squared Error =  0.3421052631578948
Mean Absolute Error =  0.4999999999999999
Mean Squared Logarithmic Error =  ERROR: Target variable contains negative values
Mean Absolute Percentage Error =  ERROR: values contain zero


  log_squared = np.square(np.log(y_hat+1)-np.log(y+1))
  abs_sum = abs((y_hat-y) / y) * 1


## Part 2

### 1) Create M 1d array using arange function
- The function __np.arange([start,]stop, [step,]dtype)__ takes in params of:
    - start: the start value of the range (inclusive)
    - stop: the end value of the range (exclusive)

In [7]:
# create a 1d array from 1 to 16
M = np.arange(1,17)
print("M array: {}".format(M))

M array: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16]


### 2) Reshaping M into 4x4 array
- To change the shape of the 1d array to a 4x4 array, we will be using the __reshape(shape)__ function where:
    - shape is the number of rows, and columns it would be changed to

In [8]:
# reshape M array to have 4 rows and 4 columns
N = M.reshape(4,4)
print("N array:\n{}".format(N))

N array:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]


### 3) Getting the 2nd Row of N

In [9]:
# to get the 2nd row, we should index at 1 since arrays start at index 0
print("2nd row of N: {}".format(N[1]))

2nd row of N: [5 6 7 8]


### 4) Getting the minimum values per column of N
- To get the minimum values per column, we will be using the __np.min(ndarray, axis)__ function where:
    - ndarray is the array we will use to find the minimum value
    - axis is to indicate whether we are finding it for the rows or columns

In [10]:
# getting the min value for each column where axis = 0
min_val = np.min(N, axis=0)

print("Minimum values of each column in Array N")

# prints the minimum value per column of array N
for column in range(len(min_val)):
    print("Column {} = {}".format(column, min_val[column]))

Minimum values of each column in Array N
Column 0 = 1
Column 1 = 2
Column 2 = 3
Column 3 = 4


### 5) Getting the locations of the minimum values per column of N
- To get the locations of the minimum values per column, we will be using the __np.where(condition)__ function where:
    - condition is used to return elements that satisfy it 

In [11]:
# returns an array of index where the minimum values are at the N matrix
result = np.where(N == min_val)

print("The locations of the minimum values per column of N are at")

# iterate on the row and col location of where the minimum values per column are at
# prints the minimum value=[row, col] location 
for row, col in zip(result[0], result[1]):
    print("{}=[{},{}]".format(N[row][col],row, col))

The locations of the minimum values per column of N are at
1=[0,0]
2=[0,1]
3=[0,2]
4=[0,3]


### 6) Getting the average of the whole matrix of N
- We can get the average by using the __mean()__ function

In [12]:
print("Average of whole matrix of N = {}".format(N.mean()))

Average of whole matrix of N = 8.5


### 7) Creating vector V with 4 elements
- To prepopulate the elements of the vector, we will use __np.ones(shape, dtype)__ function where:
    - shape represents the array
    - dtype is the data type

In [13]:
# create vector with 4 elements of type int
V = np.ones(4, dtype=int)
print("Vector V = {}".format(V))

Vector V = [1 1 1 1]


### 8) Dot product of N and V

In [14]:
# Shape of matrix N is (4,4) while vector V is (4,) which means we could execute the dot product
print("The dot product of N and V = {}".format(np.dot(N,V)))

The dot product of N and V = [10 26 42 58]
