In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

ds = pd.read_csv("sample-data.csv")

tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(ds['description'])

cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

results = {}

for idx, row in ds.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], ds['id'][i]) for i in similar_indices]

    # First item is the item itself, so remove it.
    # Each dictionary entry is like: [(1,2), (3,4)], with each tuple being (score, item_id)
    results[row['id']] = similar_items[1:]
    
print('Done!')

Done!


In [7]:
# hacky little function to get a friendly item name from the description field, given an item ID
def item(id):
    return ds.loc[ds['id'] == id]['description'].tolist()[0].split(' - ')[0]

# Just reads the results out of the dictionary. No real logic here.
def recommend(item_id, num):
    print("Recommending " + str(num) + " products similar to " + item(item_id) + "...")
    print("-------")
    recs = results[item_id][:num]
    for rec in recs:
        print("Recommended: " + item(rec[1]) + " (score:" + str(rec[0]) + ")")

# Just plug in any item id here (1-500), and the number of recommendations you want (1-99)
# You can get a list of valid item IDs by evaluating the variable 'ds', or a few are listed below

recommend(item_id=11, num=5)

recommend(item_id=3, num=5)

recommend(item_id=1, num=5)

recommend(item_id=110, num=5)


Recommending 5 products similar to Baby sunshade top...
-------
Recommended: Sunshade hoody (score:0.2133029602108501)
Recommended: Baby baggies apron dress (score:0.10975311296284813)
Recommended: Runshade t-shirt (score:0.09988151262780706)
Recommended: Runshade t-shirt (score:0.09530698241688194)
Recommended: Runshade top (score:0.08510550093018401)
Recommending 5 products similar to Active sport briefs...
-------
Recommended: Active sport boxer briefs (score:0.4181663992161579)
Recommended: Active boy shorts (score:0.1140184812203876)
Recommended: Active briefs (score:0.11053729446572895)
Recommended: Active briefs (score:0.1091764001658287)
Recommended: Active mesh bra (score:0.10172320448715239)
Recommending 5 products similar to Active classic boxers...
-------
Recommended: Cap 1 boxer briefs (score:0.22037921472617453)
Recommended: Active boxer briefs (score:0.16938950913002357)
Recommended: Cap 1 bottoms (score:0.16769458065321555)
Recommended: Cap 1 t-shirt (score:0.164855277