In [26]:
# scipy provides scientific computing functions
# it makes a library of routines available for numeric optimizations
import scipy as sp

# numpy allows you to work with large numbersets efficiently
# it has very efficient array and linear algebra functions
# its also open source
import numpy as np

# pandas let you represent your data as a virtual spreadsheet (think MS Excel) - makes it easy to work with .csv data
# pandas is short for "panel data"
import pandas as pd

In [27]:
# create Python array
ratings_traditional = [5,2,3,3,4,5,5,1,5,1,3,4]

In [28]:
# in traditional programming, you would enumerate with a forloop for example
for i, value in enumerate(ratings_traditional):
    print("Updating ratings_traditional {}".format(i))
    ratings_traditional[i] = value * 2

    
print(ratings_traditional)

Updating ratings_traditional 0
Updating ratings_traditional 1
Updating ratings_traditional 2
Updating ratings_traditional 3
Updating ratings_traditional 4
Updating ratings_traditional 5
Updating ratings_traditional 6
Updating ratings_traditional 7
Updating ratings_traditional 8
Updating ratings_traditional 9
Updating ratings_traditional 10
Updating ratings_traditional 11
[10, 4, 6, 6, 8, 10, 10, 2, 10, 2, 6, 8]


In [29]:
# enumeration with a forloop (as in the previous) is inefficient
# we can actually leverage the ability of modern cpu's to crunch numbers in parallel
# Single Instruction Multiple Data (SIMD) - instead of looping through each array element one at a time,
# the cpu can load chunks of the array into memory, and do all the multiplications of the elements in the array in one step

# below, we can use NumPy to create a NumPy array (as opposed to a "normal" Python array)
ratings = np.array([5,2,3,3,4,5,5,1,5,1,3,4])

# multiply the entire array by 2 - NumPy will apply this to each item in the array seperately
ratings = ratings * 2

# print out  - we get the same result as a forloop, but we have the added benefit of 
# SIMD that NumPy provides (which makes for faster code execution) - this is called "Vectorizing" our code
print(ratings)

[10  4  6  6  8 10 10  2 10  2  6  8]
