<a href="https://colab.research.google.com/github/thulieblack/deeplearnings/blob/main/collaborative_filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
# python dict of all the people who made reviews on amazon popular bands

users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
         "Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
         "Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
         "Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
         "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
         "Jordyn":  {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
         "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
         "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
        }


In [28]:
users["Hailey"]

{'Broken Bells': 4.0,
 'Deadmau5': 1.0,
 'Norah Jones': 4.0,
 'The Strokes': 4.0,
 'Vampire Weekend': 1.0}

In [29]:
#below is the function that computes the manhattan distance

def manhattan(rating1,rating2):
  distance = 0
  commonRatings = False 
  for key in rating1:
      if key in rating2:
          distance += abs(rating1[key] - rating2[key])
          commonRatings = True
  if commonRatings:
       return distance
  else:
      return -1 #Indicates no ratings in common

In [30]:
#test the function
manhattan(users['Hailey'],users['Jordyn'])

7.5

In [31]:
manhattan(users['Veronica'],users['Chan'])

6.5

In [32]:
# below is the function that finds the closest person and retuns a list with the closest person first

def computeNearestNeighbor(username,users):
  """ creates a sorted list based on the users distance to the username """
  distances = []
  for user in users:
      if user != username:
          distance = manhattan(users[user],users[username])
          distances.append((distance,user))
    # sort on distance closest first
  distances.sort()
  return distances


In [33]:
computeNearestNeighbor("Sam",users)

[(4.0, 'Chan'),
 (4.0, 'Hailey'),
 (6.0, 'Jordyn'),
 (7.5, 'Dan'),
 (8.0, 'Angelica'),
 (8.0, 'Bill'),
 (8.5, 'Veronica')]

In [34]:
computeNearestNeighbor("Bill",users)

[(4.0, 'Veronica'),
 (5.0, 'Dan'),
 (5.5, 'Hailey'),
 (6.0, 'Jordyn'),
 (8.0, 'Sam'),
 (9.0, 'Angelica'),
 (14.0, 'Chan')]

In [35]:
# create a function that makes recommendations by finding the nearest neighbor

def recommend(username,users):
  #find nearestneighbor first
  nearest = computeNearestNeighbor(username, users)[0][1]
 
  recommendations = []
  # find bands neigbor rated that user didn't
  neighborRatings = users[nearest]
  userRatings = users[username]
  for artist in neighborRatings:
    if not artist in userRatings:
      recommendations.append((artist, neighborRatings[artist]))
      # use function sorted for variety as sort is more effiecient
    return sorted(recommendations,key=lambda artistTuple: artistTuple[1], reverse = True)

In [36]:
## Lets see the recommendations

recommend('Hailey', users)

[('Blues Traveler', 3.0)]

In [40]:
recommend('Bill', users)

[]

In [38]:
recommend('Veronica', users)

[('Broken Bells', 4.0)]

we see Bil returned an empty recommendation. lets improve the system to avoid such errors using the Pearson correlation formula


In [41]:
from math import sqrt

In [43]:
def pearson(rating1, rating2):
    sum_xy = 0
    sum_x = 0
    sum_y = 0
    sum_x2 = 0
    sum_y2 = 0
    n = 0
    for key in rating1:
        if key in rating2:
            n += 1
            x = rating1[key]
            y = rating2[key]
            sum_xy += x * y
            sum_x += x
            sum_y += y
            sum_x2 += pow(x, 2)
            sum_y2 += pow(y, 2)
    # now compute denominator
    denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n)
    if denominator == 0:
        return 0
    else:
        return (sum_xy - (sum_x * sum_y) / n) / denominator

In [48]:
pearson(users["Bill"],users["Angelica"])

-0.9040534990682699

In [49]:
pearson(users["Chan"],users["Dan"])

-0.9630868246861539