### Introduction to NumPy

In [3]:
# Lists cannot be multiplied together.
list_a = [1, 2, 3]
list_b = [2, 4, 6]

list_a * list_b

TypeError: can't multiply sequence by non-int of type 'list'

In [4]:
# To perform element-wise multiplication between two lists, you could
# use a for loop.
list_c = []
for i in range(len(list_a)):
    list_c.append(list_a[i] * list_b[i])

list_c

[2, 8, 18]

In [5]:
# NumPy arrays let you perform array operations.

# Import numpy, aliased as np.
import numpy as np

# Convert lists to arrays.
array_a = np.array(list_a)
array_b = np.array(list_b)

# Perform element-wise multiplication between the arrays.
array_a * array_b

array([ 2,  8, 18])

In [6]:
import numpy as np

# The np.array() function converts an object to an ndarray
x = np.array([1, 2, 3, 4])
x

array([1, 2, 3, 4])

In [7]:
# Arrays can be indexed.
x[-1] = 5
x

array([1, 2, 3, 5])

In [8]:
# Trying to access an index that doesn't exist will throw an error.
x[4] = 10

IndexError: index 4 is out of bounds for axis 0 with size 4

In [9]:
# Arrays cast every element they contain as the same data type.
arr = np.array([1, 2, 'coconut'])
arr

array(['1', '2', 'coconut'], dtype='<U21')

In [10]:
# NumPy arrays are a class called `ndarray`.
print(type(arr))

<class 'numpy.ndarray'>


In [11]:
# The dtype attribute returns the data type of an array's contents.
arr = np.array([1, 2, 3])
arr.dtype

dtype('int64')

In [12]:
# The shape attribute returns the number of elements in each dimension
# of an array.
arr.shape

(3,)

In [14]:
# The ndim attribute returns the number of dimensions in an array.
arr.ndim

1

In [15]:
# Create a 2D array by passing a list of lists to np.array() function.
arr_2d = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
print(arr_2d.shape)
print(arr_2d.ndim)
arr_2d

(4, 2)
2


array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [16]:
# Create a 3D array by passing a list of two lists of lists to np.array() function.
arr_3d = np.array([[[1, 2, 3],
                   [3, 4, 5]],

                  [[5, 6, 7],
                   [7, 8, 9]]]
)

print(arr_3d.shape)
print(arr_3d.ndim)
arr_3d

(2, 2, 3)
3


array([[[1, 2, 3],
        [3, 4, 5]],

       [[5, 6, 7],
        [7, 8, 9]]])

In [17]:
# The reshape() method changes the shape of an array.
arr_2d = arr_2d.reshape(2, 4)
arr_2d

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [18]:
# Create new array
arr = np.array([1, 2, 3, 4, 5])

# The mean() method returns the mean of the elements in an array.
np.mean(arr)

np.float64(3.0)

In [19]:
# The log() method returns the natural logarithm of the elements in an array.
np.log(arr)

array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791])

In [20]:
# The floor() method returns the value of a number rounded down
# to the nearest integer.
np.floor(5.7)

np.float64(5.0)

In [21]:
# The ceil() method returns the value of a number rounded up
# to the nearest integer.
np.ceil(5.3)

np.float64(6.0)

![image.png](attachment:image.png)

In [22]:
array_2d = np.array([(1, 2, 3), (4, 5, 6)])
print(array_2d)
print()
array_2d.flatten()

[[1 2 3]
 [4 5 6]]



array([1, 2, 3, 4, 5, 6])

In [23]:
array_2d = np.array([(1, 2, 3), (4, 5, 6)])
print(array_2d)
print()
array_2d.reshape(3, 2)

[[1 2 3]
 [4 5 6]]



array([[1, 2],
       [3, 4],
       [5, 6]])

In [24]:
array_2d = np.array([(1, 2, 3), (4, 5, 6)])
print(array_2d)
print()
array_2d.reshape(3, -1)

[[1 2 3]
 [4 5 6]]



array([[1, 2],
       [3, 4],
       [5, 6]])

In [25]:
array_2d = np.array([(1, 2, 3), (4, 5, 6)])
print(array_2d)
print()
array_2d.tolist()

[[1 2 3]
 [4 5 6]]



[[1, 2, 3], [4, 5, 6]]

In [27]:
a = np.array([(1, 2, 3), (4, 5, 6)])
print(a)
print()

print(f'max: {a.max()}')
print(f'mean: {a.mean()}')
print(f'min: {a.min()}')
print(f'std: {a.std()}')

[[1 2 3]
 [4 5 6]]

max: 6
mean: 3.5
min: 1
std: 1.707825127659933


In [30]:
array_2d = np.array([(1, 2, 3), (4, 5, 6)])
print(array_2d)
print()

print(f'shape: {array_2d.shape}')
print(f'dtype: {array_2d.dtype}')
print(f'size: {array_2d.size}')
print(f'T: {array_2d.T}')  # returns the array transposed (rows become columns, columns become rows).

[[1 2 3]
 [4 5 6]]

shape: (2, 3)
dtype: int64
size: 6
T: [[1 4]
 [2 5]
 [3 6]]


In [31]:
a = np.array([(1, 2, 3), (4, 5, 6)])
print(a)
print()

print(a[1])
print(a[0, 1])
print(a[1, 2])

[[1 2 3]
 [4 5 6]]

[4 5 6]
2
6


In [32]:
a = np.array([(1, 2, 3), (4, 5, 6)])
print(a)
print()

a[:, 1:]

[[1 2 3]
 [4 5 6]]



array([[2, 3],
       [5, 6]])

In [33]:
a = np.array([(1, 2, 3), (4, 5, 6)])
b = np.array([[1, 2, 3], [1, 2, 3]])
print('a:')
print(a)
print()
print('b:')
print(b)
print()
print('a + b:')
print(a + b)
print()
print('a * b:')
print(a * b)

a:
[[1 2 3]
 [4 5 6]]

b:
[[1 2 3]
 [1 2 3]]

a + b:
[[2 4 6]
 [5 7 9]]

a * b:
[[ 1  4  9]
 [ 4 10 18]]


In [34]:
a = np.array([(1, 2), (3, 4)])
print(a)
print()

a[1][1] = 100
a

[[1 2]
 [3 4]]



array([[  1,   2],
       [  3, 100]])

In [None]:
a = np.array([1, 2, 3])
print(a)
print()

a[3] = 100 # the array cannot be lengthened or shortened
a

You are given an ordered `list` of AQI readings called `aqi_list`.

1. Use a NumPy function to convert the list to an `ndarray`. Assign the result to a variable called `aqi_array`.
2. Print the length of `aqi_array`.
3. Print the first five elements of `aqi_array`.

In [None]:
### RUN THIS CELL TO IMPORT YOUR DATA
# it won't work in VSCode/Jupyter, only in Coursera Labs
import ada_c2_labs as lab
aqi_list = lab.fetch_epa('aqi')

In [None]:
aqi_array = np.array(aqi_list)
len(aqi_array)
aqi_array[:5]

Now that you have the AQI data stored in an array, use NumPy functions to calculate some summary statistics about it.

* Use built-in NumPy functions to print the following values from `aqi_array`:
    1. Maximum value
    2. Minimum value
    3. Median value
    4. Standard deviation

In [None]:
print(f'Max value: {aqi_array.max()}')
print(f'Min value: {aqi_array.min()}')
print(f'Median value: {np.median(aqi_array)}')
print(f'Mean value: {aqi_array.mean()}')
print(f'Standard deviation: {aqi_array.std()}')


You are interested in how many air quality readings in the data represent the cleanest air, which we'll consider **readings of 5 or less.**

To perform this calculation, you'll make use of one of the properties of arrays that make them so powerful: their element-wise operability. For example, when you add an integer to an `ndarray` using the `+` operator, it performs an element-wise addition on the whole array.

**The same concept applies to comparison operators used on an `ndarray`.** With this in mind:

* Calculate the percentage of AQI readings that are considered cleanest:
    1. Use a comparison statement to get an array of Boolean values that is the same length as `aqi_array`. Assign the result to variable called `boolean_aqi`.
    2. Calculate the number of `True` values in the `boolean_aqi` and divide this number by the total number of values in the array. Assign the result to a variable named `percent_under_6` and print it.

# 1

boolean_aqi = []
for element in aqi_array:
    if element <= 5:
        boolean_aqi.append(True)
    elif element > 5:
        boolean_aqi.append(False)


boolean_aqi = np.array(boolean_aqi)

# 2
# Python loops + if/elif method
true_counter = 0
false_counter = 0
for x in boolean_aqi:
    if x == True:
        true_counter +=1 
    elif x == False:
        false_counter +=1
        
trues_percent = true_counter / len(boolean_aqi)
print(f'Total: {len(boolean_aqi)}')
print(f'True: {true_counter}')
print(f'False: {false_counter}')
print(f'Trues percentage: {trues_percent}')


# 3
# NumPy method
boolean_aqi = aqi_array <= 5

true_count = boolean_aqi.sum()
percent_under_6 = true_count / len(boolean_aqi)

print(percent_under_6)
