# Numpy Examples

## 1) Computation with arrays

### a) Sigmoid activation function of a vector

In [4]:
import numpy as np
x = np.array([4, -1, 7, 9, 3, -5])

**Apply the Sigmoid activation function: $y = sigmoid(x)$**
$$sigmoid(x_i) = \frac{1}{1+exp(-x_i)}$$
<img src='./sigmoid.png' width='40%'>

In [5]:
def sigmoid(val:np.array) -> np.array:
    """
        Compute the sigmoid of the given array
    Args:
        val (np.array): input array

    Returns:
        np.array: output array
    """
    return (1+np.exp(-val))**(-1)

In [6]:
sigmoid(x)

array([0.98201379, 0.26894142, 0.99908895, 0.99987661, 0.95257413,
       0.00669285])

### b) Softmax activation function

Activation function that normalizes the input vector (z) to a discrete probability distribution (values of the result add to 1).

$$y_i = \frac{exp(x_i)}{\sum_j{exp(x_j)}}$$

In [7]:
def softmax(ary: np.array) -> np.array:
    """
        Return the array where softmax function was applied element
        by element

    Args:
        ary (np.array): input array

    Returns:
        np.array: output array
    """
    ary_to_return = []
    denominator = np.sum(np.exp(ary))
    
    for element in ary:
        numerator = np.exp(element)
        ary_to_return.append(numerator/denominator)
    
    return np.array(ary_to_return)

In [8]:
softmax(x)

array([5.88673555e-03, 3.96645122e-05, 1.18238244e-01, 8.73669020e-01,
       2.16560899e-03, 7.26480881e-07])

## 2) Broadcasting: dataset normalization

In [9]:
# Input table
n_samples = 100
n_columns = 5
mean = 1
std = 3
np.random.seed(0)
X = np.random.normal(mean, std, (n_samples, n_columns))

In [10]:
X

array([[ 6.29215704,  2.20047163,  3.93621395,  7.7226796 ,  6.60267397],
       [-1.93183364,  3.85026525,  0.54592838,  0.69034344,  2.23179551],
       [ 1.43213071,  5.36282052,  3.28311318,  1.36502505,  2.3315897 ],
       [ 2.00102298,  5.48223722,  0.38452521,  1.9392031 , -1.56228722],
       [-6.65896945,  2.96085579,  3.5933086 , -1.22649506,  7.80926387],
       [-3.36309702,  1.13727555,  0.43844845,  5.59833764,  5.40807631],
       [ 1.46484228,  2.13448756, -1.66335724, -4.9423894 , -0.04373645],
       [ 1.46904691,  4.69087204,  4.60713955, -0.16198045,  0.09309175],
       [-2.1456589 , -3.26005381, -4.11881057,  6.85232619, -0.52895655],
       [-0.3142229 , -2.75838608,  3.33247107, -3.84169354,  0.36177916],
       [-1.68639968,  2.16070749, -0.53241541, -2.54189655,  0.91545331],
       [ 2.28499561,  1.19955167,  1.90741569, -0.90296628, -0.0882235 ],
       [-1.01738134, -0.07865948, -1.43943885, -4.17884781,  1.53227843],
       [-0.20534281, -3.89059504,  2.3

In [11]:
X.shape

(100, 5)

** Apply z-score: normalize each column by subtracting its mean and dividing by its standard deviation **

In [12]:
def z_score_norm(ary:np.ndarray) -> np.ndarray:
    """
        Return the normalized array. It is normalized by subtracting
        mean of each column to each column and divided by the std of 
        each column

    Args:
        ary (np.ndarray): input array

    Returns:
        np.ndarray: output array
    """
    mean = compute_column_mean(ary)
    std = compute_column_std(ary)
    return (ary - mean)/std

def compute_column_mean(ary:np.ndarray) -> np.ndarray:
    """
       Return the mean of each column

    Args:
        ary (np.ndarray): input array

    Returns:
        np.ndarray: output array
    """
    if len(ary.shape) == 2:
        return np.mean(ary, axis=0)
    elif len(ary.shape) == 3:
        return np.mean(ary, axis=1)
    else:
        return np.mean(ary, axis=-1)

def compute_column_std(ary:np.ndarray) -> np.ndarray:
    """
       Return the std of each column

    Args:
        ary (np.ndarray): input array

    Returns:
        np.ndarray: output array
    """
    if len(ary.shape) == 2:
        return np.std(ary, axis=0)
    elif len(ary.shape) == 3:
        return np.std(ary, axis=1)
    else:
        return np.std(ary, axis=-1)

In [13]:
z_score_norm(X)

array([[ 1.85121484e+00,  4.35937637e-01,  1.03609275e+00,
         2.25758183e+00,  1.77957297e+00],
       [-8.66924873e-01,  1.01080095e+00, -9.28497305e-02,
        -8.64767513e-03,  2.73738572e-01],
       [ 2.44910663e-01,  1.53784417e+00,  8.18614550e-01,
         2.08774150e-01,  3.08119196e-01],
       [ 4.32937222e-01,  1.57945439e+00, -1.46595908e-01,
         3.93807860e-01, -1.03338088e+00],
       [-2.42930694e+00,  7.00890145e-01,  9.21907556e-01,
        -6.26365014e-01,  2.19526163e+00],
       [-1.33997670e+00,  6.54716527e-02, -1.28639829e-01,
         1.57299476e+00,  1.36801583e+00],
       [ 2.55722275e-01,  4.12945780e-01, -8.28527104e-01,
        -1.82384324e+00, -5.10216947e-01],
       [ 2.57111962e-01,  1.30370668e+00,  1.25950650e+00,
        -2.83316223e-01, -4.63077543e-01],
       [-9.37597001e-01, -1.46675838e+00, -1.64617668e+00,
         1.97710312e+00, -6.77382683e-01],
       [-3.32282734e-01, -1.29195447e+00,  8.35050399e-01,
        -1.46913473e+00

## 3) Accessing Numpy Arrays

In [14]:
#Input data
import numpy as np

# Input table (12 samples x 4 attributes)
X = np.array([
                [5.1, 3.5, 1.4, 0.2],
                [4.3, 3. , 1.1, 0.1],
                [5. , 3.4, 1.6, 0.4],
                [5.1, 3.4, 1.5, 0.2],
                [6.9, 3.1, 4.9, 1.5],
                [6.7, 3.1, 4.4, 1.4],
                [6. , 2.9, 4.5, 1.5],
                [6.1, 3. , 4.6, 1.4],
                [6.5, 3. , 5.8, 2.2],
                [7.7, 3.8, 6.7, 2.2],
                [7.4, 2.8, 6.1, 1.9],
                [6.8, 3.2, 5.9, 2.3]
           ])
# Column names
columns = ['height','width','intensity','weight']

# Class label of each sample
labels = np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 2])

In [15]:
# Get column indices
height_i = columns.index('height')
intensity_i = columns.index('intensity')

**Compute the average height:**

In [16]:
print("the mean is %.2f" % np.mean(X[:,0]))

the mean is 6.13


**Compute the average height of samples with intensity greater than 5:**

In [17]:
mask = X[:,2] > 5 # rows in which the 3rd column (index 2) is greater than 5
print("the mean is %.2f" % np.mean(X[mask][:,0])) # I want to select all the rows but only the 1st column (index 0)

the mean is 7.10


**Compute the probability of class 2 if intensity>5**
$$p = \frac{\#(intensity>5 \land class2)}{\#(intensity>5)}$$

In [18]:
# Since mask has boolean values, we can sum them and obtain 
# the total amount of rows with intensity > 5
denominator = sum(mask) 

# indexes where I have intensity > 5. 
# Now I can count if in those indexes I have label 2
idxs = np.argwhere(mask).squeeze()
numerator = 0
for el in labels[idxs]:
    # if the element match 2, we increase the value of the numerator
    if el == 2:
        numerator += 1
print("The probability of class 2 if intensity > 5 is %.2f" % (numerator/denominator))

The probability of class 2 if intensity > 5 is 0.75


**Print the height of the top 3 records with highest intensity:**

In [19]:
# from colored import fg
color = fg('green')

# highest value of intensity in increasing order, hence the highest value is the last one
top_3 = np.argsort(X, axis=0)[-3:, 2] # Use it to index
#np.sort(X, axis=0 )
rank = len(top_3)

print("Highest intensity")

for sample in X[top_3]:
    print(f"Rank number {rank}")
    rank -= 1
    for column, element in zip(columns, sample):
        if column == 'intensity':
            print(f"\t{column}:\t{element}")
        else:
            if column == 'height':
                print(color + f"\t{column}:\t{element}")
            else:
                print(f"\t{column}:\t\t{element}")
    

Highest intensity
Rank number 3
	height:	6.8
	width:		3.2
	intensity:	5.9
	weight:		2.3
Rank number 2
	height:	7.4
	width:		2.8
	intensity:	6.1
	weight:		1.9
Rank number 1
	height:	7.7
	width:		3.8
	intensity:	6.7
	weight:		2.2


In [20]:
X[top_3]

array([[6.8, 3.2, 5.9, 2.3],
       [7.4, 2.8, 6.1, 1.9],
       [7.7, 3.8, 6.7, 2.2]])

In [22]:
# try to print using colors

from colored import fg
color = fg('green')
print (color + 'Hello World !!!')

Hello World !!!
