# Import software libraries and load the datasets

In [1]:
import sys
import numpy as np

# Summarize software libraries used.
print('Libraries used in this project:')
print('- Python {}'.format(sys.version))
print('- NumPy {}'.format(np.__version__))

# Load the datasets.
ratings = np.load('../data/customer_ratings.npy')
prices = np.loadtxt('../data/unit_tax_total.csv',
                    delimiter = ',')
print('\nLoaded ratings and prices datasets.')

# Print floats using fixed-point notation.
np.set_printoptions(suppress = True)

Libraries used in this project:
- Python 3.9.7 (default, Sep 16 2021, 16:59:28) [MSC v.1916 64 bit (AMD64)]
- NumPy 1.22.3

Loaded ratings and prices datasets.


# Confirm the speed advantage of vectorization

In [None]:
%%time
arr_sum = 0
for i in np.arange(1, 1e7):
    arr_sum += i
arr_sum

In [None]:
%%time
arr_sum = np.sum(np.arange(1, 1e7))
arr_sum

# Use indexing to retrieve individual customer ratings

In [2]:
ratings

array([ 9.1,  9.6,  7.4,  8.4,  5.3,  4.1,  5.8,  8. ,  7.2,  5.9,  4.5,
        6.8,  7.1,  8.2,  5.7,  4.5,  4.6,  6.9,  8.6,  4.4,  4.8,  5.1,
        4.4,  9.9,  6. ,  8.5,  6.7,  7.7,  9.6,  7.4,  4.8,  4.5,  5.1,
        5.1,  7.5,  6.8,  7. ,  4.7,  7.6,  7.7,  7.9,  6.3,  5.6,  7.6,
        7.2,  9.5,  8.4,  4.1,  8.1,  7.9,  9.5,  8.5,  6.5,  6.1,  6.5,
        8.2,  5.8,  6.6,  5.4,  9.3, 10. ,  7. , 10. ,  8.6,  7.6,  5.8,
        6.7,  9.9,  6.4,  4.3,  9.6,  5.9,  4. ,  8.7,  9.4,  5.4,  8.6,
        5.7,  6.6,  6. ,  5.5,  6.4,  6.6,  8.3,  6.6,  4. ,  9.9,  7.3,
        5.7,  6.1,  7.1,  8.2,  5.1,  8.6,  6.6,  7.2,  5.1,  4.1,  9.3,
        7.4])

In [3]:
print('First customer rating: {}.'.format(ratings[0]))
print('Ninth customer rating: {}.'.format(ratings[8]))

First customer rating: 9.1.
Ninth customer rating: 7.2.


In [4]:
print('First and ninth customer ratings: {}'.format(ratings[[0, 8]]))

First and ninth customer ratings: [9.1 7.2]


# Use slicing to retrieve multiple customer ratings

In [5]:
print('The first 10 ratings are: {}'.format(ratings[0:10:1]))

# Streamlined notation.
print('\nThe first 10 ratings are: {}'.format(ratings[:10]))

print('\nRatings 20 through 30 are: {}'.format(ratings[19:30]))
print('\nObject type: {}'.format(type(ratings[19:30])))

The first 10 ratings are: [9.1 9.6 7.4 8.4 5.3 4.1 5.8 8.  7.2 5.9]

The first 10 ratings are: [9.1 9.6 7.4 8.4 5.3 4.1 5.8 8.  7.2 5.9]

Ratings 20 through 30 are: [4.4 4.8 5.1 4.4 9.9 6.  8.5 6.7 7.7 9.6 7.4]

Object type: <class 'numpy.ndarray'>


In [None]:
print('Every other rating:\n {}'.format(ratings[::2]))
print('\nEvery third rating between 10 and 50:\n {}' \
      .format(ratings[9:50:3]))

# Index a multi-dimensional array

In [None]:
prices

In [None]:
print('First unit price: ${}.'.format(prices[0, 0]))
print('Third tax price: ${}.'.format(prices[1, 2]))
print('Ninth total price: ${}.'.format(prices[2, 8]))

In [None]:
row = np.array([0, 1, 2])
col = np.array([0, 2, 8])
print('First unit price, third tax price, and ninth total price:\n {}' \
      .format(prices[row, col]))

# Slice a multi-dimensional array

In [None]:
print('First 10 sale prices:\n {}'.format(prices[0, :9]))
print('\nTax prices from 10 to 20:\n {}'.format(prices[1, 9:19]))

In [None]:
print('First 10 tax prices and total prices:\n {}' \
      .format(prices[1:3, :10]))

# Summarize statistics about the pricing data

In [None]:
low = np.amin(prices[0])
high = np.amax(prices[0])
print('The lowest unit price is: ${:.2f}.'.format(low))
print('The highest unit price is: ${:.2f}.'.format(high))

In [None]:
mean = prices[1].mean()
median = np.median(prices[1])
print('The mean tax price is: ${:.2f}.'.format(mean))
print('The median tax price is: ${:.2f}.'.format(median))

In [None]:
std = prices[2].std()
var = prices[2].var()
print('The standard deviation of total price is: {:.4f}.'.format(std))
print('The variance of total price is: {:.4f}.'.format(var))

In [None]:
total = prices[2].sum()
print('Total sales: ${:.2f}.'.format(total))