In [3]:
import numpy as np
from numpy.random import default_rng

In [29]:
# Creating an array
sales = [0, 5, 155, 0, 518, 0, 1827, 616, 317, 325]

sales_array = np.array(sales)

In [30]:
sales_array

array([   0,    5,  155,    0,  518,    0, 1827,  616,  317,  325])

In [31]:
# What do each of the array properties return?
print(f'ndim: {sales_array.ndim}')
print(f'size: {sales_array.size}') # 
print(f'shape: {sales_array.shape}') # what is the size for each dimension
print(f'dtype: {sales_array.dtype}')

ndim: 1
size: 10
shape: (10,)
dtype: int64


In [32]:
sales_two = [[0, 5, 155, 0, 518] , [0, 1827, 616, 317, 325]]
sales_two_array = np.array(sales_two)
sales_two_array



array([[   0,    5,  155,    0,  518],
       [   0, 1827,  616,  317,  325]])

In [33]:
print(f'ndim: {sales_two_array.ndim}')
print(f'size: {sales_two_array.size}') # 
print(f'shape: {sales_two_array.shape}') # what is the size for each dimension
print(f'dtype: {sales_two_array.dtype}')

ndim: 2
size: 10
shape: (2, 5)
dtype: int64


In [34]:
array = np.array(range(5))

In [35]:
array

array([0, 1, 2, 3, 4])

In [36]:
array_2d = np.array([range(5), range(5)])
array_2d

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [37]:
array_2d + 1

array([[1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5]])

In [38]:
print(f'ndim: {array_2d.ndim}')
print(f'size: {array_2d.size}') # 
print(f'shape: {array_2d.shape}') # what is the size for each dimension
print(f'dtype: {array_2d.dtype}')

ndim: 2
size: 10
shape: (2, 5)
dtype: int64


In [None]:
# To transpose an array
array_2d.T

array([[0, 0],
       [1, 1],
       [2, 2],
       [3, 3],
       [4, 4]])

In [43]:
# Convert a list into a numpy array
my_list = [x * 10 for x in range(1, 11)]

my_array = np.array(my_list)
my_array

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [44]:
print(f'ndim: {my_array.ndim}')
print(f'size: {my_array.size}') # 
print(f'shape: {my_array.shape}') # what is the size for each dimension
print(f'dtype: {my_array.dtype}')

ndim: 1
size: 10
shape: (10,)
dtype: int64


### Array Creation

In [None]:
print(f'ones array {np.ones(4,)}')
print(f'zeros array {np.zeros((2, 5), dtype = int)}')
print(f'a range array {np.arange(10)}') # Note the start at 0
print(f'spaced array {np.linspace(0, 100, 5)}') # Stop point is inclusive
print(f'transposed array {np.arange(1, 9, 2).reshape(2, 2)}') # chaining with a reshape

ones array [1. 1. 1. 1.]
zeros array [[0 0 0 0 0]
 [0 0 0 0 0]]
a range array [0 1 2 3 4 5 6 7 8 9]
spaced array [  0.  25.  50.  75. 100.]
transposed array [[1 3]
 [5 7]]


In [52]:
np.arange(1, 9, 2)

array([1, 3, 5, 7])

In [54]:
np.identity(10, 'int')

array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [60]:
np.linspace(0, 100, 11)


array([  0.,  10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])

### Random Numbers

In [None]:
rng = default_rng(12345) # Sets the seed
print(f'{rng}')

Generator(PCG64)


In [67]:
random_array = rng.random(10)
random_array

array([0.22733602, 0.31675834, 0.79736546, 0.67625467, 0.39110955,
       0.33281393, 0.59830875, 0.18673419, 0.67275604, 0.94180287])

In [None]:
# Specify a mean and std dev
rng = default_rng(12345) #Sets the seed
mean, stddev = 5, 1
random_normal = rng.normal(mean, stddev, size = 10)
random_normal

array([3.57617496, 6.26372846, 4.12933826, 4.74082677, 4.92465669,
       4.25911535, 3.6322073 , 5.6488928 , 5.36105811, 3.04713694])

In [69]:
rng = np.random.default_rng(616)
rng.random(10)

array([0.39682145, 0.86568572, 0.46040359, 0.30599848, 0.57381588,
       0.08888468, 0.88194347, 0.73228387, 0.73215182, 0.56233394])

In [70]:
rng.integers(0, 10, 100)

array([8, 3, 6, 0, 3, 3, 1, 2, 2, 4, 9, 1, 2, 5, 1, 8, 9, 0, 3, 1, 5, 8,
       0, 6, 1, 7, 0, 7, 6, 0, 3, 7, 1, 9, 4, 1, 6, 1, 4, 4, 9, 5, 3, 4,
       7, 8, 3, 3, 5, 0, 4, 9, 9, 5, 6, 5, 8, 2, 3, 0, 0, 8, 3, 8, 4, 8,
       8, 9, 7, 3, 8, 4, 9, 2, 6, 3, 0, 3, 2, 0, 0, 4, 9, 5, 4, 1, 8, 2,
       7, 7, 1, 2, 2, 4, 1, 6, 7, 3, 7, 5])

In [None]:
rng.normal(50, 5, 10) # sample from a distribution with mean of 50, stddev of 5, 10 numbers

array([53.53115044, 46.35468178, 46.1580894 , 52.89177118, 45.90693639,
       44.51748741, 57.41467712, 51.82624671, 53.9032125 , 53.21454641])

### Exercise

In [None]:
# Create an array and reshape it into a 5 x 2 array
np.linspace(10, 100, 10).reshape(5, 2)
# np.arange(10, 101, 10).reshape(5, 2)

array([[ 10.,  20.],
       [ 30.,  40.],
       [ 50.,  60.],
       [ 70.,  80.],
       [ 90., 100.]])

In [90]:
# Create a random array of numbers between zero and 1 in a 3 x 3 shape
rng = default_rng(2022)
random_array = rng.random(9).reshape(3,3)
random_array

array([[0.24742606, 0.09299006, 0.61176337],
       [0.06066207, 0.66103343, 0.75515778],
       [0.1108689 , 0.04305584, 0.41441747]])

In [89]:
np.arange(1, 11)*10

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

### Array Slicing

In [6]:
product_array2d = np.array([['fruits', 'vegetables', 'cereal', 'dairy', 'eggs'], ['snacks', 'beverages', 'coffee', 'tea', 'spices']]).reshape(2, 5)
product_array2d

array([['fruits', 'vegetables', 'cereal', 'dairy', 'eggs'],
       ['snacks', 'beverages', 'coffee', 'tea', 'spices']], dtype='<U10')

In [None]:
# How to index coffee?
product_array2d[1, 2]

np.str_('coffee')

In [9]:
# Grab all rows, last 3 terms
product_array2d[:, ::2]

array([['fruits', 'cereal', 'eggs'],
       ['snacks', 'coffee', 'spices']], dtype='<U10')

In [12]:
# Interger array
interger_array = np.arange(12)
interger_array

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [16]:
interger_array[-1]
interger_array[:5]

array([0, 1, 2, 3, 4])

In [17]:
interger_array[::2]

array([ 0,  2,  4,  6,  8, 10])

In [19]:
new_array = interger_array.reshape(3, 4)
new_array

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [20]:
new_array[:, :]

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [23]:
new_array[1:, :]

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [26]:
# how to grab #9
new_array[2, 1]

np.int64(9)

### Array Challenge

In [27]:
# Create a random array of numbers between zero and 1 in a 3 x 3 shape
rng = default_rng(2022)
random_array = rng.random(9).reshape(3,3)
random_array

array([[0.24742606, 0.09299006, 0.61176337],
       [0.06066207, 0.66103343, 0.75515778],
       [0.1108689 , 0.04305584, 0.41441747]])

In [None]:
# Extract first two rows
print(f'first two rows: {random_array[:2, :]}')
print(f'first column {random_array[:, 0]}')
print(f'2nd number, 3rd row {random_array[2, 1]}')

first two rows: [[0.24742606 0.09299006 0.61176337]
 [0.06066207 0.66103343 0.75515778]]
first column [0.24742606 0.06066207 0.1108689 ]
2nd number, 3rd row 0.04305584439252108


In [37]:
# Vectorizing
rng = np.random.default_rng(616)
inventory = rng.integers(0, 100, 10)
inventory

array([39, 39, 93, 86, 48, 46, 48, 30, 11, 57])

In [39]:
print(f'sutract 24: {inventory - 24}')
print(f'halve: {inventory/2}')

sutract 24: [ 15  15  69  62  24  22  24   6 -13  33]
halve: [19.5 19.5 46.5 43.  24.  23.  24.  15.   5.5 28.5]


In [47]:
price = (rng.random(10) * 10).round(2)
price

array([5.09, 2.26, 0.17, 8.52, 8.97, 8.06, 9.09, 3.71, 4.11, 3.  ])

In [48]:
(price * inventory).sum()

np.float64(2600.33)

In [50]:
inventory_list = list(inventory)
inventory_list

[np.int64(39),
 np.int64(39),
 np.int64(93),
 np.int64(86),
 np.int64(48),
 np.int64(46),
 np.int64(48),
 np.int64(30),
 np.int64(11),
 np.int64(57)]

In [55]:
new_inventory = []

for x in inventory_list:
    new_inventory.append(x + 2)

new_inventory


[np.int64(41),
 np.int64(41),
 np.int64(95),
 np.int64(88),
 np.int64(50),
 np.int64(48),
 np.int64(50),
 np.int64(32),
 np.int64(13),
 np.int64(59)]

In [57]:
# Multiplication in base python
[x * y for x, y in zip(inventory_list, price)]

[np.float64(198.51),
 np.float64(88.13999999999999),
 np.float64(15.81),
 np.float64(732.7199999999999),
 np.float64(430.56000000000006),
 np.float64(370.76000000000005),
 np.float64(436.32),
 np.float64(111.3),
 np.float64(45.21),
 np.float64(171.0)]

### Array operators

In [59]:
# Add a flat shipping cost of 5 to our prices
prices = np.array([5.99, 6.99, 22.49, 99.99, 4.99, 49.99 ])
total_owed = prices + 5
total_owed



array([ 10.99,  11.99,  27.49, 104.99,   9.99,  54.99])

In [86]:
# Discount percent is first six numbers of the random_array
discount_percent = random_array[:2, :].flatten()
print(f' {discount_percent}')

percent_owed = 1 - discount_percent 
print(f'{percent_owed}')

final_owed = (percent_owed * total_owed).round(2)
print(f'{final_owed}')

# Format with a $ sign
formatted_final_owed = [f'${value:.2f}' for value in final_owed]
print(f'{formatted_final_owed}')

 [0.24742606 0.09299006 0.61176337 0.06066207 0.66103343 0.75515778]
[0.75257394 0.90700994 0.38823663 0.93933793 0.33896657 0.24484222]
[ 8.27 10.88 10.67 98.62  3.39 13.46]
['$8.27', '$10.88', '$10.67', '$98.62', '$3.39', '$13.46']


In [61]:
random_array

array([[0.24742606, 0.09299006, 0.61176337],
       [0.06066207, 0.66103343, 0.75515778],
       [0.1108689 , 0.04305584, 0.41441747]])

In [None]:
# Filtering arrays
sales_array = np.array([[0, 5, 155, 0, 518], [0, 1827, 616, 317, 325]])
sales_array

# Returns an array w/ T/F
sales_array != 0

array([[False,  True,  True, False,  True],
       [False,  True,  True,  True,  True]])

In [91]:
# filter based on the logical array test results 
sales_array[sales_array != 0]

array([   5,  155,  518, 1827,  616,  317,  325])

In [None]:
# Using | and & to filter
sales_array[(sales_array == 616) | (sales_array < 100)]

array([  0,   5,   0,   0, 616])

In [None]:
# Use an boolean mask to make code cleaner for filter
mask = (sales_array > 100) & (sales_array < 500)
sales_array[mask]


array([155, 317, 325])

In [99]:
# Using filter from different arrays
s_array = np.array([0, 5, 155, 0, 518])
product_array = np.array(['fruits', 'vegetables', 'cereal', 'dairy', 'eggs'], dtype = '<U10')

product_array[s_array > 0]

array(['vegetables', 'cereal', 'eggs'], dtype='<U10')

In [None]:
# Use modulus operator to filter for even numbers
my_array = np.arange(20)
my_array[my_array % 2 == 0]


array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [103]:
even_odd = np.array(['even', 'odd']*10)
even_odd

array(['even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even',
       'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd',
       'even', 'odd'], dtype='<U4')

### where() function for logical tests

In [106]:
inv_array = np.array([12, 102, 18, 0, 0])
print(f'{inv_array}')
print(f'{product_array}')

[ 12 102  18   0   0]
['fruits' 'vegetables' 'cereal' 'dairy' 'eggs']


In [110]:
np.where(inv_array <=0, "Out of Stock", product_array)

array(['fruits', 'vegetables', 'cereal', 'Out of Stock', 'Out of Stock'],
      dtype='<U12')

In [None]:
# Can nest where() calls within where() calls
np.where(my_array % 2 == 0, 'even', np.where(my_array == 9, my_array, 'odd'))


array(['even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even',
       '9', 'even', 'odd', 'even', 'odd', 'even', 'odd', 'even', 'odd',
       'even', 'odd'], dtype='<U21')

### Exercise: Filtering Arrays

In [146]:
products = np.array(
    ["salad", "bread", "mustard", "rare tomato", "cola", "gourmet ice cream"]
)

# Check if sizes are compatible in the arrays
products.size == prices.size


True

In [147]:
# Filter our products that are under 25
products[prices > 25]

array(['rare tomato', 'gourmet ice cream'], dtype='<U17')

In [148]:
# Modify the mask so it also includes cola OR prices over 25
mask = (products == 'cola') | (prices > 25)
fancy_feast_special = products[mask]
fancy_feast_special

array(['rare tomato', 'cola', 'gourmet ice cream'], dtype='<U17')

In [145]:
shipping_cost = np.where(prices > 20, 0, 5)
shipping_cost

array([5, 5, 0, 0, 5, 0])

### Array Aggregation

In [None]:
rng = np.random.default_rng(616)
price = (rng.random(10) * 10).round(2)

In [1]:
inventory = rng.integers(0, 100, 10)
inventory

NameError: name 'rng' is not defined

In [None]:
price.mean()

np.float64(5.6)

In [None]:
inventory.mean()

np.float64(35.3)

In [None]:
print(f'{inventory.sum()}')
print(f'{inventory.min()}')
print(f'{inventory.max()}')
print(f'{inventory.mean()}')

353
6
80
35.3


In [None]:
# Total value of products
print(f'{(price * inventory).sum().round(2)}')

1848.29


In [None]:
# Show the most expensive product index value
(price * inventory).argmin()

np.int64(3)

In [None]:
price_2d = price.reshape(5, 2)
price_2d

array([[3.97, 8.66],
       [4.6 , 3.06],
       [5.74, 0.89],
       [8.82, 7.32],
       [7.32, 5.62]])

In [None]:
print(f'{price_2d.sum(axis = 0)}')  
print(f'{price_2d.sum(axis = 1)}')  

[30.45 25.55]
[12.63  7.66  6.63 16.14 12.94]


In [None]:
print(f'{np.median(sales_array)}')
print(f'{np.mean(sales_array)}')
print(f'{np.std(sales_array).round(2)}')

236.0
376.3
529.14


In [None]:
# UNIQUE values of sales array 
np.unique(sales_array)

array([   0,    5,  155,  317,  325,  518,  616, 1827])

In [4]:
price = np.array([2.6, 7.2, 6.92, 3.88, 0.78, 2.67, 8.87, 0.23, 5.23, 6.3])
inventory = np.array([79, 22, 20, 48, 8, 76, 12, 16, 5, 64])

product_value = price * inventory
print(f'{product_value}')

[205.4  158.4  138.4  186.24   6.24 202.92 106.44   3.68  26.15 403.2 ]


In [None]:
product_value.sort()
print(f'{product_value}')

[  3.68   6.24  26.15 106.44 138.4  158.4  186.24 202.92 205.4  403.2 ]


In [None]:
reshaped_product_value = product_value.reshape(2, 5)
print(f'{reshaped_product_value}')

[[  3.68   6.24  26.15 106.44 138.4 ]
 [158.4  186.24 202.92 205.4  403.2 ]]


In [None]:
np.median(reshaped_product_value)

np.float64(148.4)

In [None]:
np.percentile(reshaped_product_value, 99)

np.float64(385.398)

In [None]:
np.sqrt(reshaped_product_value)

array([[ 1.91833261,  2.4979992 ,  5.11370707, 10.3169763 , 11.76435294],
       [12.58570618, 13.64697769, 14.24499912, 14.33178286, 20.07984064]])

In [None]:
np.unique(reshaped_product_value)

array([  3.68,   6.24,  26.15, 106.44, 138.4 , 158.4 , 186.24, 202.92,
       205.4 , 403.2 ])

In [None]:

# Print each item on a new line
for item in dir(prices):
    print(item)

### Sorting
`sales_array.sort()` will sort based on axis provided, 1 is default, sorts the arrays by rows; 0 will sort by columns

In [2]:
product_value 

NameError: name 'product_value' is not defined

In [5]:
product_value

array([205.4 , 158.4 , 138.4 , 186.24,   6.24, 202.92, 106.44,   3.68,
        26.15, 403.2 ])

In [8]:
# Sorting on product value array - returns a copy
np.sort(product_value)[:5]

array([  3.68,   6.24,  26.15, 106.44, 138.4 ])

In [10]:
product_value.sort()

In [12]:
product_value[0]

np.float64(3.68)

In [13]:
# to reverse the order of the array completely can use the ::-1 notation
product_value[::-1]

array([403.2 , 205.4 , 202.92, 186.24, 158.4 , 138.4 , 106.44,  26.15,
         6.24,   3.68])

In [14]:
# Sorting prices and determining statistics
prices = np.array([5.99, 6.99, 22.49, 99.99, 4.99, 49.99])

In [31]:
# Sort in descencing order to get the top 3
top3_prices = np.sort(prices)[-3:]
print(f'{top3_prices}')

[22.49 49.99 99.99]


In [32]:
print(f'mean: {top3_prices.mean()}')
print(f'min: {top3_prices.min()}')
print(f'max: {top3_prices.max()}')
print(f'median: {np.median(top3_prices)}')

mean: 57.49
min: 22.49
max: 99.99
median: 49.99


In [30]:
# How many unique price tiers?
price_tiers = np.array(["budget", "budget", "mid-tier", "luxury", "mid-tier", "luxury"])
print(f'{np.unique(price_tiers).size}')


3


In [33]:
# Vectorization
def for_loop_mult_lists(list1, list2):
    product_list = [] # Create an empty list to store the products
    for element1, element2 in zip(list1, list2):
        product_list.append(element1 * element2)
    return product_list

def mult_arrays(array1, array2):
    return array1 * array2

list1 = list(range(1000))
list2 = list(range(1000))

In [38]:
%%timeit -r 5 -n 10000
for_loop_mult_lists(list1, list2)

52.7 μs ± 1.11 μs per loop (mean ± std. dev. of 5 runs, 10,000 loops each)


In [36]:
array1 = np.array(list1)
array2 = np.array(list2)

In [39]:
%%timeit -r 5 -n 10000
mult_arrays(array1, array2)

1.74 μs ± 264 ns per loop (mean ± std. dev. of 5 runs, 10,000 loops each)


In [40]:
test_array = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
test_array

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [41]:
test_array + 1

array([[2, 3, 4],
       [2, 3, 4],
       [2, 3, 4]])

### Final Assignment for NumPy: Bringing it all together

In [46]:
import pandas as pd


ModuleNotFoundError: No module named 'pandas'