In [1]:
# Create a Python list whose elements are the numbers 3, 7, 1, 3, 5.

l = [3, 7, 1, 3, 5]

In [2]:
# Write a function that computes the average of a list of numbers.

def find_avg(list_of_numbers):
    return (sum(list_of_numbers) / len(list_of_numbers))

The average of a list of numbers $\left( x_1, x_2, \ldots, x_n \right)$ is given by:

$$
\bar{x} = \frac{1}{n} \sum_{i=1}^{n} x_i
$$

Where:
- $\left( \bar{x} \right)$ is the average,
- $\left( n \right)$ is the number of values,
- $\left(x_i \right)$ represents each value in the list.

The above function solves this equation using built in python functions and returns the answer without assigning any variables.


In [3]:
# Compute the average runtime.

'''
Use the time package to record accurate time of day at points before
and after code executes.
'''
import time
start = time.time()

avg = find_avg(l)

end = time.time()


'''
Total time elapsed is end - start times.
'''
elapsed_time = end - start
print(f'average: {avg}\nelapsed time: {elapsed_time}s')

average: 3.8
elapsed time: 1.8358230590820312e-05s


In [4]:
# Turn the above list into a numpy 1-D array.

import numpy as np

'''
turning a variable (list) into an array
'''
a = np.array(l)

In [5]:
# Print the average of the above array. 

'''
find the average using our hand-written function
'''
avg_array = find_avg(a)

'''
print the computed average
'''
print(avg_array)

3.8


In [6]:
# Get the average using numpy in both of the ways described above.

'''
Use time function as before. Measure run times for using numpy
to compute the average of both an array (a) and list (l)
'''
start = time.time() 
avg = np.mean(l)
end = time.time()
np_mean_list_time = end - start
print(f'numpy list avg: {avg}, elapsed time: {np_mean_list_time}')

start = time.time() 
avg = np.mean(a)
end = time.time()
np_mean_array_time = end - start
print(f'numpy list avg: {avg}, elapsed time: {np_mean_array_time}')

numpy list avg: 3.8, elapsed time: 7.700920104980469e-05
numpy list avg: 3.8, elapsed time: 3.361701965332031e-05


In [7]:
# Compare the runtime of the average computation using numpy 
# with the runtime of the function you wrote eariler.



'''
First gather run times of both list(l) and array (a) using the
hand-written function
'''
print(f'elapsed time avg of np list: {np_mean_list_time}s')
print(f'elapsed time avg of np array: {np_mean_array_time}s')

start = time.time()
avg = find_avg(l)
end = time.time()

avg_list_time = end - start

start = time.time()
avg = find_avg(a)
end = time.time()

avg_array_time = end - start

print(f'elapsed time avg of list: {avg_list_time}s')
print(f'elapsed time avg of array: {avg_array_time}s')


'''
Measure the difference in run times between numpy and hand-written
function. We already know the hand-written function is faster
from the raw measurements.
'''
del_t_l = np_mean_list_time - avg_list_time
del_t_a = np_mean_array_time - avg_array_time
print(
    f'The hand-written function is {del_t_l}s faster with lists '
    f'and {del_t_a}s faster with arrays'
)

elapsed time avg of np list: 7.700920104980469e-05s
elapsed time avg of np array: 3.361701965332031e-05s
elapsed time avg of list: 1.9311904907226562e-05s
elapsed time avg of array: 2.2411346435546875e-05s
The hand-written function is 5.7697296142578125e-05s faster with lists and 1.1205673217773438e-05s faster with arrays


In [8]:
# Load the pandas library and use pandas.Series to create a pandas Series object, 
# which is the equivalent of a numpy 1-D array.

import pandas as pd

'''
Turn the list into a series just like we did with numpy. Just call the Series method.
'''
s = pd.Series(l)

In [9]:
# Pass the Series to the numpy.mean function to confirm it returns its average.

'''
Comparing the 
'''
np_mean_of_series = np.mean(s)

print(f'our calculated mean: {np_mean_of_series}'
      f', same as before')

our calculated mean: 3.8, same as before


In [10]:
# Call the mean method of the Series and confirm it returns its average.

'''
Pandas has a lot of overlap with numpy. Find the average of the series just like 
a numpy array.
'''
s.mean()

3.8

In [11]:
# Show that by calling the values attribute of a Series object, you get a numpy array.

'''
Using the python type function will show us what we need
'''
print(f'Type of s.values:", {type(s.values)}')

s.values

Type of s.values:", <class 'numpy.ndarray'>


array([3, 7, 1, 3, 5])