# NumPy Pair Problem

For today's pair, we will use NumPy to complete the following tasks consecutively.

In [41]:
import numpy as np

# Set random seed so that results are reproducible 
np.random.seed(0)

### 1. Generate three separate 1-dimensional arrays of 10,000 elements each.
* Array 1 should contain numbers randomly drawn from the uniform distribution between 1 and 100
* Array 2 should contain numbers randomly drawn from the normal distribution, with a mean of 0 and a standard deviation of 1
* Array 3 should contain numbers randomly drawn from the binomial distribution, where n=10 and p=0.5

In [42]:
uniform = np.random.uniform(1, 100, 10000)  
normal = np.random.normal(0, 1, 10000)       
binomial = np.random.binomial(10, 0.5, 10000) 

print("Array 1:", uniform[:5])
print("Array 2:", normal[:5])
print("Array 3:", binomial[:5])

Array 1: [55.33253689 71.80374727 60.67357423 54.94343512 42.94182513]
Array 2: [-0.72582032  0.56347552 -0.43563209 -0.10455255 -2.32127283]
Array 3: [4 5 5 3 8]


### 2. For each array, randomly sample 1,000 rows without replacement. (hint: `np.random.choice()`)


In [43]:

uniform = np.random.choice(uniform, 1000, replace=False)
normal = np.random.choice(normal, 1000, replace=False)
binomial = np.random.choice(binomial, 1000, replace=False)

print("Array 1:", uniform[:5])
print("Array 2:", normal[:5])
print("Array 3:", binomial[:5])

Array 1: [25.78942307 22.82505784 72.23603848 73.69670482 29.80696432]
Array 2: [-2.33277117  1.63920097 -1.08501398  0.06679293 -0.41540346]
Array 3: [6 6 5 7 3]


### 3. For each of the three samples:  
- Calculate the mean, median, and standard deviation 
- Find the index and the value of the largest element (hint: `np.argmax()`)  
- Find the value of the 75th percentile (hint: `np.percentile()`)  

In [44]:

uniform_mean = np.mean(uniform)
uniform_median = np.median(uniform)
uniform_std = np.std(uniform)


normal_mean = np.mean(normal)
normal_median = np.median(normal)
normal_std = np.std(normal)


binomial_mean = np.mean(binomial)
binomial_median = np.median(binomial)
binomial_std = np.std(binomial)

index_uniform = np.argmax(uniform)
index_normal = np.argmax(normal)
index_binomial = np.argmax(binomial)

max_value_uniform = uniform[index_uniform]
max_value_normal = normal[index_normal]
max_value_binomial = binomial[index_binomial]


print(index_uniform, index_normal, index_binomial)
print(max_value_uniform, max_value_normal, max_value_binomial)


percentile_75_uniform = np.percentile(uniform, 75)
percentile_75_normal = np.percentile(normal, 75)
percentile_75_binomial = np.percentile(binomial, 75)


print(percentile_75_uniform, percentile_75_normal, percentile_75_binomial)



255 628 7
99.83117493278014 2.699568441630259 10
75.20925502271464 0.6323050377198595 6.0


### 4. Sort each of the three samples in increasing order, then combine them together to create a 1,000 by 3 array. (hint: `np.sort()` and `np.stack()`). Now, check the shape of the array.

In [45]:
uniform_sorted=np.sort(uniform)
normal_sorted=np.sort(normal)
binomial_sorted=np.sort(binomial)

combined=np.stack((uniform_sorted,normal_sorted,binomial_sorted),axis=1)
combined

array([[ 1.1110819 , -3.69428529,  1.        ],
       [ 1.13557152, -3.01787832,  1.        ],
       [ 1.13695165, -2.92415314,  1.        ],
       ...,
       [99.57570491,  2.52860191,  9.        ],
       [99.65852424,  2.55385416, 10.        ],
       [99.83117493,  2.69956844, 10.        ]])

In [46]:
np.shape(combined)

(1000, 3)

### 5. Replace all negative values with 0. Replace all values that exceed 10 with 10.

In [53]:
combined_replaced=np.where(combined<0,0,combined)
combined_replaced=np.where(combined_replaced>10,10,combined_replaced)
combined_replaced

array([[ 1.1110819 ,  0.        ,  1.        ],
       [ 1.13557152,  0.        ,  1.        ],
       [ 1.13695165,  0.        ,  1.        ],
       ...,
       [10.        ,  2.52860191,  9.        ],
       [10.        ,  2.55385416, 10.        ],
       [10.        ,  2.69956844, 10.        ]])

### 6. Subtract 5 from all values that exceed 5. (hint: `np.where()`)

In [None]:
combined_replaced=np.where(combined_replaced>5,combined_replaced-5,combined_replaced)
combined_replaced

array([[1.1110819 , 0.        , 1.        ],
       [1.13557152, 0.        , 1.        ],
       [1.13695165, 0.        , 1.        ],
       ...,
       [5.        , 2.52860191, 4.        ],
       [5.        , 2.55385416, 5.        ],
       [5.        , 2.69956844, 5.        ]])

### 7. Produce a fourth column that is the sum of the first two columns.  (hint: `np.hstack()`). Now check the new shape of the array.

In [None]:
fourth_column = combined_replaced[:, 0] + combined_replaced[:, 1]

np.shape(fourth_column)
new_combined=np.hstack((combined_replaced,fourth_column.reshape(-1,1)))
new_combined

array([[1.1110819 , 0.        , 1.        , 1.1110819 ],
       [1.13557152, 0.        , 1.        , 1.13557152],
       [1.13695165, 0.        , 1.        , 1.13695165],
       ...,
       [5.        , 2.52860191, 4.        , 7.52860191],
       [5.        , 2.55385416, 5.        , 7.55385416],
       [5.        , 2.69956844, 5.        , 7.69956844]])

In [70]:
np.shape(new_combined)

(1000, 4)

### 8. Use broadcasting to add 1 to each element of the first column, 2 to each element of the second column, 3 to each element of the third column, and 4 to each element of the fourth column.

In [None]:
add_values = np.array([1, 2, 3, 4])
new_combined += add_values
new_combined

array([[ 2.1110819 ,  2.        ,  4.        ,  5.1110819 ],
       [ 2.13557152,  2.        ,  4.        ,  5.13557152],
       [ 2.13695165,  2.        ,  4.        ,  5.13695165],
       ...,
       [ 6.        ,  4.52860191,  7.        , 11.52860191],
       [ 6.        ,  4.55385416,  8.        , 11.55385416],
       [ 6.        ,  4.69956844,  8.        , 11.69956844]])

### 9. Matrix multiply this 1000x4 array by a 4x1 array of ones. How many unique values are in the resulting product?  (hint: `np.unique()`)

In [78]:
array_ones=np.ones((4,1))

result=np.dot(new_combined,array_ones)

unique=np.unique(result)

print(unique)
print("==========================================================")
print(len(unique))


[12.40598877 12.44779957 12.60215543 12.80197663 13.10944756 13.18789103
 13.2221638  13.27114305 13.2739033  13.32939749 13.36328397 13.38833382
 13.39124509 13.48477163 13.51906704 13.697767   13.76566562 13.7993105
 14.04688247 14.11577459 14.11963723 14.24324801 14.63924238 15.01919689
 15.09597644 15.24468195 15.32219202 15.47220317 15.52826745 15.57533602
 16.70377048 16.73861175 16.86897105 16.95513744 17.01051989 17.01531534
 17.05832279 17.09355034 17.11709453 17.23225581 17.25405543 17.52941705
 17.59825782 17.61214826 17.70120428 17.98525494 18.14893279 18.23841362
 18.52634811 18.53060273 18.63268897 18.69142712 18.6950651  18.70609017
 18.90427349 18.9171343  19.21503471 19.30975336 19.51701738 19.69282114
 19.74098426 19.86228632 19.88499246 19.93016433 19.98576973 20.03973659
 20.04848176 20.19847198 20.22085276 20.27723587 20.45735955 20.51347508
 20.8488449  21.00176228 21.0381574  21.17125467 21.50174397 21.5469433
 21.62878051 21.73904983 21.74697006 21.75459633 21.7

# Thank You