# Chapter 4. Basic Mathematics

## Basic symbols and terminology

### Vectors and matrices

In [1]:
# Simple python list
x = [3, 6, 8]
type(x)

list

In [2]:
# Numpy array
import numpy as np

x = np.array([3, 6, 8])
type(x)

numpy.ndarray

### Arithmetic symbols

In [3]:
# Summation
x = [1,2,3,4,5]
print(x, type(x))
x = list(range(1,6))
print(x, type(x))

sum(x)

[1, 2, 3, 4, 5] <class 'list'>
[1, 2, 3, 4, 5] <class 'list'>


15

### Set theory

In [4]:
s = set()
s = set([1, 2, 2, 3, 2, 1, 2, 2, 3, 2])
# will remove duplicates from a list
print(s)
s == {1, 2, 3}


{1, 2, 3}


True

In [5]:
dict = {"dog": "human's best friend", "cat": "destroyer of world"}
print(dict["dog"]) # == "human's best friend"
print(len(dict["cat"])) # == 18

# but if we try to create a pair with the same key as an existing key
dict["dog"] = "Arf"

print(dict)
# It will override the previous value
# dictionaries cannot have two values for one key.


human's best friend
18
{'dog': 'Arf', 'cat': 'destroyer of world'}


In [6]:
print(s)  # == {1,2,3}
len(s) == 3 # magnitude of s

{1, 2, 3}


True

In [7]:
user1 = {"Target","Banana Republic","Old Navy"} 
# note that we use {} notation to create a set
# compare that to using [] to make a list
print(user1)

{'Banana Republic', 'Old Navy', 'Target'}


In [8]:
user2 = {"Banana Republic","Gap","Kohl's"}
print(user2)

{'Gap', 'Banana Republic', "Kohl's"}


In [9]:
user1 = {"Target","Banana Republic","Old Navy"} 
user2 = {"Banana Republic","Gap","Kohl's"}

def jaccard(user1, user2):
    stores_in_common = len(user1 & user2)
    stores_all_together = len(user1 | user2)
    return stores_in_common / float(stores_all_together)

# I cast stores_all_together as a float to return a decimal answer instead of python's default integer division

# so
jaccard(user1, user2) # == 0.2 or 1/5

0.2

## Linear algebra

In [10]:
import numpy as np

# create user preferences
user_pref = np.array([5, 1, 3])

# create a random movie matrix of 10,000 movies
movies = np.random.randint(5,size=(3,1000))+1

# Note that the randint will make random integers from 0-4
# so I added a 1 at the end to increase the scale from 1-5

In [11]:
print(user_pref.shape) # (1, 3)

print(movies.shape)    # (3, 1000)

(3,)
(3, 1000)


In [12]:
# np.dot does both dot products and matrix multiplication
recs = np.dot(user_pref, movies)
recs[0:5]

array([26, 23, 27, 16, 31])

In [13]:
import time

for i in (10000, 100000, 1000000, 10000000, 100000000):
    movies = np.random.randint(5,size=(3,i))+1
    now = time.time()
    np.dot(user_pref, movies)
    print((time.time() - now), "seconds to run", i, "movies")


8.130073547363281e-05 seconds to run 10000 movies
0.0011301040649414062 seconds to run 100000 movies
0.01098775863647461 seconds to run 1000000 movies
0.07930231094360352 seconds to run 10000000 movies
3.7567296028137207 seconds to run 100000000 movies
