# NumPy Pair Problem

For today's pair, we will use NumPy to complete the following tasks consecutively.

In [1]:
import numpy as np

# Set random seed so that results are reproducible
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
* Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
* Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
* Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

In [2]:
arr1 = np.random.uniform(1, 100, 10000)
arr2 = np.random.normal(0, 1, 10000)
arr3 = np.random.binomial(10, 0.5, 10000)

### 2. For each array, randomly sample 1,000 rows without replacement. (hint: `np.random.choice()`)


In [4]:
s_arr1 = np.random.choice(arr1, size=1000, replace=False)
s_arr2 = np.random.choice(arr2, size=1000, replace=False)
s_arr3 = np.random.choice(arr3, size=1000, replace=False)

### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

In [6]:
# For sample 1
s_arr1_mean = np.mean(s_arr1)
s_arr1_median =  np.median(s_arr1)
s_arr1_sd = np.std(s_arr1)
s_arr1_l_idx = np.argmax(s_arr1)
s_arr1_l_element = s_arr1[s_arr1_l_idx]
s_arr1_75_perc = np.percentile(s_arr1, 75)

# For sample 2
s_arr2_mean = np.mean(s_arr2)
s_arr2_median =  np.median(s_arr2)
s_arr2_sd = np.std(s_arr2)
s_arr2_l_idx = np.argmax(s_arr2)
s_arr2_l_element = s_arr2[s_arr2_l_idx]
s_arr2_75_perc = np.percentile(s_arr2, 75)

# For sample 3
s_arr3_mean = np.mean(s_arr3)
s_arr3_median =  np.median(s_arr3)
s_arr3_sd = np.std(s_arr3)
s_arr3_l_idx = np.argmax(s_arr3)
s_arr3_l_element = s_arr3[s_arr3_l_idx]
s_arr3_75_perc = np.percentile(s_arr3, 75)

### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`). Now, check the shape of the array.

In [9]:
# For sample 1
sorted_s_arr1 = np.sort(s_arr1)

# For sample 2
sorted_s_arr2 = np.sort(s_arr2)

# For sample 3
sorted_s_arr3 = np.sort(s_arr3)

# Combine them in one array (1000x3)
combined_arr = np.stack((sorted_s_arr1, sorted_s_arr2, sorted_s_arr3), axis=1)
combined_arr.shape

(1000, 3)

### 5. Replace all negative values with 0. Replace all values that exceed 10 with 10.

In [10]:
combined_arr[combined_arr < 0] = 0
combined_arr[combined_arr > 10] = 10

### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

In [11]:
combined_arr = np.where(combined_arr > 5, combined_arr - 5, combined_arr)

### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`). Now check the new shape of the array.

In [12]:
sum = combined_arr[:, 0] + combined_arr[:, 1]
new_arr = np.hstack((combined_arr, sum.reshape(-1, 1))) # .reshape is needed in hstack to make sum a 2D array
new_arr.shape

(1000, 4)

### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

In [13]:
adjustments = np.array([1, 2, 3, 4])
new_arr += adjustments

### 9. Matrix multiply this 1000x4 array by a 4x1 array of ones. How many unique values are in the resulting product?  (hint: `np.unique()`)

In [14]:
res_product = np.dot(new_arr, np.ones(shape=(4, 1)))
unique_values = np.unique(res_product)
len(unique_values)

589

# Thank You