#### Open up pickle file which contains the product dictionary of customer ratings.

In [65]:
import pickle

pkl_file = open('C:/Users/Michael/Desktop/productDict.pkl', 'rb')
products = pickle.load(pkl_file)
pkl_file.close()

#### Returns the Pearson correlation coefficient for item1 and item2.

In [66]:
from math import sqrt
import numpy as np

def pear(products,item1,item2):
    # Get the list similar items
    sim = {}
    for item in products[item1]:
        if item in products[item2]: sim[item] = 1
    
    # Find the number of elements
    n = len(sim)

    # if they are no ratings in common, return 0
    if n == 0: return 0
    
    # Add up all the ratings
    sum1 = sum([products[item1][user] for user in sim])
    sum2 = sum([products[item2][user] for user in sim])

    # Sum up the squares
    sum1Sq = sum([pow(products[item1][user],2) for user in sim])
    sum2Sq = sum([pow(products[item2][user],2) for user in sim])

    # Sum up the products
    pSum = sum([products[item1][user] * products[item2][user] for user in sim])

    # Calculate Pearson coefficient 
    num = pSum - (sum1 * sum2 / n)
    den = sqrt((sum1Sq - pow(sum1,2) / n) * (sum2Sq - pow(sum2,2) / n))
    if den == 0: return 0
    r = num / den
    return r

#### Find the similarity coefficient of two products using Pearson Correlation Coefficient algorithm.

In [67]:
pear(products, 'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W38 L32, Charcoal Heather: Clothing', 
           'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W38 L34, Charcoal Heather: Clothing')

1.0

#### Returns the best matches for a person from the products dictionary based on similarity coefficient.

In [68]:
# Number of results and similarity function are optional params.
def topMatches(products,person,n = 20,similarity = pear):
    sim = [(similarity(products,person,other),other) for other in products if other != person]

    # Sort the list so the highest scores appear at the top
    sim.sort( )
    sim.reverse( )
    
    return sim[0:n]

#### Find products similar to 'Dockers Original Khaki Classic Fit...'

In [69]:
topMatches(products, 'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W38 L32, Charcoal Heather: Clothing')

[(1.0, 'Hannah Montana Tote Bag with Wig and Assorted Accessories'),
 (1.0,
  'Amazon.com: St. Christopher &quot;Ride with Me&quot; Motorcycle Medal--Keychain, BH010: Clothing'),
 (1.0, 'Amazon.com: Levi&#39;s Men&#39;s 550 Relaxed Fit Jean: Clothing'),
 (1.0,
  'Amazon.com: JanSport Classics Series Superbreak Backpack (Alien Green): Sports &amp; Outdoors'),
 (1.0,
  'Amazon.com: Dockers Original Khaki Pants Classic Fit by Levi&#39;s 40443 in Cement: 36WX28L: Clothing'),
 (1.0,
  'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W44 L34, Charcoal Heather: Clothing'),
 (1.0,
  'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W44 L32, Charcoal Heather: Clothing'),
 (1.0,
  'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W44 L30, Charcoal Heather: Clothing'),
 (1.0,
  'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W42 L36, Charcoal Heather: Clothing'),
 (1.0,
  'Amazon.com: Dockers Original Khaki Classic Fit - Pleated, W42 L34, Charcoal Heather: C

#### Return a dictionary of customers where each customer contains the product and the ratings given.

In [70]:
import pandas as pd

def loadAmazonItems():
    # Get Amazon products
    df = pd.read_csv('C:/Users/Michael/Desktop/clothing.csv')
    df = df.rename(columns={'product/productId': 'pid', 'product/title': 'title', 'product/price': 'price', 
                            'review/userId': 'uid', 'review/profileName': 'profileName', 'review/helpfulness': 'helpfulness', 
                            'review/score': 'rating', 'review/time': 'time', 'review/summary': 'summary', 'review/text': 'text'})
    
    prefs = {}
    rData = list(zip(df.profileName,df.title,df.rating))
    for user, item, rating in rData:
        prefs.setdefault(user, {})
        prefs[user][item] = float(rating)
    return prefs

In [71]:
prefs = loadAmazonItems()

#### Get recommendations based on similar users' preferences.

In [72]:
def getRecommendations(products,product,similarity=pear):
    totals = {}
    simSums = {}
    for other in products:
        # don't compare product that person has seen
        if other == product: continue
        sim = similarity(products,product,other)

        # ignore scores of zero or lower
        if sim <= 0: continue
        for user in products[other]:

            # only score products I haven't seen yet
            if user not in products[product] or products[product][user] == 0:
                # Similarity * Score
                totals.setdefault(user,0)
                totals[user] += products[other][user] * sim
                # Sum of similarities
                simSums.setdefault(user,0)
                simSums[user] += sim

    # Create the normalized list
    rankings=[(total/simSums[user],user) for user,total in totals.items( )]

    # Return the sorted list
    rankings.sort( )
    rankings.reverse( )
    return rankings

In [73]:
def calculateSimilarItems(products,n = 20):
    # Create a dictionary of items showing which other items they
    # are most similar to.
    result = {}
    
    print('Processing similarities for %d items...' % len(products))
    for item in products:
        # Find the most similar items to this one
        scores = topMatches(products,item,n = n,similarity = pear)
        result[item] = scores
    print('DONE')
    return result

#### Get recommendations based on similar items.

In [74]:
def getRecommendedItems(products,itemMatch,user):
    userRatings = products[user]
    scores = {}
    totalSim = {}
    rankings = []
    
    # Loop over each user and obtain items rated
    for (item,rating) in userRatings.items( ):
        # Loop over items similar to this one
        for (similarity,item2) in itemMatch[item]:
            # Ignore if this user has already rated this item
            if item2 in userRatings: continue

            # Weighted sum of rating times similarity
            scores.setdefault(item2,0)
            scores[item2] += similarity * rating
            
            # Take the sum of similarities for that item
            totalSim.setdefault(item2,0)
            totalSim[item2] += similarity
            
    # Divide each weighted score by the total sum of similaries to get an average
    for item, score in scores.items():
        # Ignore ratings of 0 to avoid divide by zero
        if totalSim[item] == 0: continue
        rankings.append((score / totalSim[item],item))
        
    #rankings=[(score/totalSim[item],item) for item,score in scores.items( )]
    
    # Return the rankings from highest to lowest
    rankings.sort( )
    rankings.reverse( )
    return rankings

In [75]:
prefs['Julia']

{'Amazon.com: American Apparel Spandex Jersey Yoga Pant (8300), Size: Large, Color: Heather Grey: Clothing': 4.0,
 'Amazon.com: Eagle Creek Travel Gear Pack-It Medium/Large Compression Sac Set: Clothing': 1.0,
 'Amazon.com: Exquisite Form Women&#39;s Front Close Cotton Posture Bra #5100531: Clothing': 5.0,
 'Amazon.com: Glamorise Sport Soft Cup Superior Support Bra G-1006: Clothing': 2.0,
 'Amazon.com: Hufflepuff House Scarf: Clothing': 5.0,
 'Amazon.com: JanSport Classics Series Superbreak Backpack (Alien Green): Sports &amp; Outdoors': 5.0,
 'Amazon.com: Ladies&#39; Jersey Tee Shirt: Clothing': 2.0,
 'Amazon.com: OnGossamer Women&#39;s Mesh Bump It Up Bra: Clothing': 4.0,
 'Amazon.com: Secret Wishes Maid Costume: Clothing': 2.0,
 'Amazon.com: Speedo Women&#39;s Lycra Tribal Etching Geoback Swim Suit, Blue, Size 38: Clothing': 5.0}

#### Returns the first 20 products recommended to the customer 'Sara' based on other customers.

In [76]:
getRecommendations(prefs, 'Julia')[0:20]

[(5.0, 'Vintage Retro Mirror Aviator Sunglass w/ free Pouch'),
 (5.0, 'High Sierra Water Bottle Sport Duffel'),
 (5.0, 'HARVEYS Seatbelt Zip Wallet'),
 (5.0, "Girl High Seas Buccaneer Toddler Costume - Kid's Costumes"),
 (5.0,
  'Amazon.com: Women&#39;s Soft Lightweight Travel Money Belt - White: Clothing'),
 (5.0, 'Amazon.com: SPANX Power Panties Shapewear: Clothing'),
 (5.0,
  'Amazon.com: JanSport Elefunk Metro Messenger Bag (Bubblegum): Clothing'),
 (5.0,
  'Amazon.com: JanSport Elefunk Metro Messenger Bag (Blue Jean): Clothing'),
 (5.0, 'Amazon.com: Enell High Impact Sports Bra: Clothing'),
 (5.0, 'Amazon.com: Carhartt Men&#39;s Duck Coverall Quilted Lined: Clothing'),
 (5.0, 'Amazon.com: : Clothing'),
 (4.0, 'Amazon.com: V-Neck Katahdin Tek Pullover XXX-Large Black: Clothing'),
 (4.0, 'Amazon.com: V-Neck Katahdin Tek Pullover XX-Large Navy: Clothing'),
 (4.0, 'Amazon.com: V-Neck Katahdin Tek Pullover XX-Large Charcoal: Clothing'),
 (4.0, 'Amazon.com: V-Neck Katahdin Tek Pullover 

In [77]:
itemsim = calculateSimilarItems(products)

Processing similarities for 19194 items...
DONE


#### Returns the first 20 recommended products that are similar to the items that 'Sara' has rated

In [78]:
getRecommendedItems(prefs, itemsim, 'Julia')[0:20]

[(5.0, 'green sprouts 10 Pack Waterproof Absorbent Terry Bibs'),
 (5.0, 'ZANheadgear Desert Nylon Balaclava (Camouflage)'),
 (5.0, 'Superman Dress Up Set'),
 (5.0, 'Rothco Canvas Map Case'),
 (5.0, 'Robin Deluxe Muscle Chest Child Costume'),
 (5.0, 'Rear View Sunglasses'),
 (5.0, "NEON 80's style PARTY SUNGLASSES with dark lens (12 pack)"),
 (5.0, 'Men\'s HeatGear&#174; Compression 7" Shorts Bottoms by Under Armour'),
 (5.0, 'High Sierra Water Bottle Sport Duffel'),
 (5.0, 'Hannah Montana Tote Bag with Wig and Assorted Accessories'),
 (5.0, 'Gold Eyeglass Holder Fashion Chain By Apex Medical'),
 (5.0, "Child's Heirloom Elephant Costume (Size:X-small 4-6)"),
 (5.0, 'Child Pink Supergirl Costume'),
 (5.0,
  'CHILD Police Officer Costume (Please see product details for accessories)'),
 (5.0, 'Bumkins Waterproof Supersized SuperBib, 6 - 24 Months'),
 (5.0, 'Boys Spider-Man Classic'),
 (5.0, "Body Glove Aura Women's Long Sleeve Lycra Rash Guard"),
 (5.0, 'Bald Head Wig'),
 (5.0, 'BABYBJORN 