In [1]:
# import pandas
import pandas as pd

# import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

# import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# import data
df = pd.read_csv("https://raw.githubusercontent.com/nikitaa30/Content-based-Recommender-System/master/sample-data.csv")

In [3]:
df

Unnamed: 0,id,description
0,1,Active classic boxers - There's a reason why o...
1,2,Active sport boxer briefs - Skinning up Glory ...
2,3,Active sport briefs - These superbreathable no...
3,4,"Alpine guide pants - Skin in, climb ice, switc..."
4,5,"Alpine wind jkt - On high ridges, steep ice an..."
...,...,...
495,496,Cap 2 bottoms - Cut loose from the maddening c...
496,497,Cap 2 crew - This crew takes the edge off fick...
497,498,All-time shell - No need to use that morning T...
498,499,All-wear cargo shorts - All-Wear Cargo Shorts ...


* explore DataFrame

We will be using Tf-Idf to find similar items based on description
* instantiate TF-IDF

In [4]:
tfidf = TfidfVectorizer()

* fit and transform 'description' column with TFIDF

In [5]:
X = tfidf.fit_transform(df['description'])

In [6]:
print(X.shape)
X.toarray()

(500, 4816)


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

* calculate the cosine similarity of each item with every other item in the dataset, 

In [7]:
cosine_sim = cosine_similarity(X, X)
cosine_sim.shape

(500, 500)

* sort all items using their similarity for each item i, and store the values in dictionary `results`

```
results = {
    "1": [5,7,9...],
    "2": [45,2,3...]
}
```

In [None]:
# Skipped this step because it is unecessary

* create function `recommender` that will recommend similar products
    * function must have two input params: **item_id** and **count** of similar products 

In [27]:
df.loc[0,'description']

'Active classic boxers - There\'s a reason why our boxers are a cult favorite - they keep their cool, especially in sticky situations. The quick-drying, lightweight underwear takes up minimal space in a travel pack. An exposed, brushed waistband offers next-to-skin softness, five-panel construction with a traditional boxer back for a classic fit, and a functional fly. Made of 3.7-oz 100% recycled polyester with moisture-wicking performance. Inseam (size M) is 4 1/2". Recyclable through the Common Threads Recycling Program.<br><br><b>Details:</b><ul> <li>"Silky Capilene 1 fabric is ultralight, breathable and quick-to-dry"</li> <li>"Exposed, brushed elastic waistband for comfort"</li> <li>5-panel construction with traditional boxer back</li> <li>"Inseam (size M) is 4 1/2"""</li></ul><br><br><b>Fabric: </b>3.7-oz 100% all-recycled polyester with Gladiodor natural odor control for the garment. Recyclable through the Common Threads Recycling Program<br><br><b>Weight: </b>99 g (3.5 oz)<br><b

In [31]:
def recommender(item_id, count, similarity_matrix=cosine_sim):
    # Get index of the item in df
    index = df[df['id'] == item_id].index
    print(f'Input item: {df.loc[index,"description"].values}')
    similarity_series = pd.Series(similarity_matrix[index].reshape(-1))
    most_similar = similarity_series.argsort()
    return df.iloc[most_similar[1:count+1]]

recommender(2, 5)

Input item: ['Active sport boxer briefs - Skinning up Glory requires enough movement without your boxers deciding to poach their own route. The form-fitting Active Sport Boxer Briefs are made from breathable 93% polyester (71% recycled) fabric that\'s fast-wicking, dries quickly and has 7% spandex for stretch; the seamless waistband and soft leg edges won\'t roll or bind. The gusseted, flat-sewn 6" inseam (size M) is offset to prevent inner-thigh chafe. Fly-free with a smooth front panel. Recyclable through the Common Threads Recycling Program.<br><br><b>Details:</b><ul> <li>"Stretch mesh provides support, open-weave mesh for airflow, wicks efficiently, dries fast"</li> <li>Seamless construction</li> <li>"Flat-sewn, gusseted inseam is set forward to prevent inner-thigh chafe"</li> <li>Fly-free support</li> <li>"Inseam (size M) is 6"""</li></ul><br><br><b>Fabric: </b>"4.6-oz 93% polyester (71% recycled)/7% spandex, with moisture-wicking performance. Recyclable through the Common Threads

Unnamed: 0,id,description
47,48,Going big in b.c. poster - One mans trash is a...
331,332,Traversing auguille d'entreves - Janine Patitu...
318,319,Symmetry w16 poster - Bill Serantoni ice climb...
395,396,"Wild steelhead, alaska poster - Catch and rele..."
122,123,Special edition pataloha shirt - This year's S...


In [17]:
df.loc[2]

id                                                             3
description    Active sport briefs - These superbreathable no...
Name: 2, dtype: object

* show top 5 the most similar items for item with idem_id = 11

In [32]:
print(df.loc[11,'description'])
recommender(11, 5)

Baggies shorts - Even Baggies, our most popular shorts for anything, or nothing, occasionally need an update. This season we've increased the inseam length. Their casual fit, quick-drying water-repellent nylon and elasticized waistband with an internal drawstring remain the same as ever. Other features include a polyester mesh lining, a rear snap pocket and front pockets (with self-draining mesh corners) positioned to reduce drag in the water. Inseam (size M) is 7". Recyclable through the Common Threads Recycling Program.<br><br><b>Details:</b><ul> <li>Quick-drying nylon with a DWR (durable water repellent) finish</li> <li>Elasticized waistband with internal drawstring; black mesh liner</li> <li>Vertical on-seam side pockets for reduced drag in the water; pocket bags have quick-drain-and-dry mesh corners; snap-closed back pocket</li> <li>"Inseam is 7"""</li></ul><br><br><b>Fabric: </b>4.2-oz 100% nylon with a DWR finish. Lining: 5.2-oz 100% polyester mesh. Recyclable through the Common

Unnamed: 0,id,description
331,332,Traversing auguille d'entreves - Janine Patitu...
318,319,Symmetry w16 poster - Bill Serantoni ice climb...
47,48,Going big in b.c. poster - One mans trash is a...
68,69,Lead an examined life poster - Guanacos standi...
489,490,Flyfishing the athabasca poster - Patrik Ondru...
