# Laptop Recommender System

### Libraries and Database Imports

In [1]:
import spacy
import numpy as np 
import pandas as pd 
from nltk.tokenize import word_tokenize
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
laptops = pd.read_csv('./Final_Dataframe.csv')


### Database Inspection

In [3]:
laptops.head()

Unnamed: 0,brand,laptop_name,display_size,processor_type,graphics_card,disk_space,discount_price,old_price,ratings_5max
0,HP,Notebook 14-df0008nx,14.0,Intel Celeron N4000,Intel HD Graphics 600,64 GB (eMMC),1259.0,1259.0,0 / 5
1,Lenovo,IdeaPad 330S-14IKB,14.0,Intel Core i5-8250U,Intel UHD Graphics 620,1 TB HDD,1849.0,2099.0,3.3 / 5
2,Huawei,MateBook D Volta,14.0,Intel Core i5-8250U,NVIDIA GeForce MX150 (2 GB),256 GB SSD,2999.0,3799.0,0 / 5
3,Dell,Inspiron 15 3567,15.6,Intel Core i3-7020U,Intel HD Graphics 620,1 TB HDD,1849.0,1849.0,0 / 5
4,Asus,VivoBook 15 X510UR,15.6,Intel Core i7-8550U,NVIDIA GeForce 930MX (2 GB),1 TB HDD,2499.0,3149.0,0 / 5


In [4]:
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   brand           205 non-null    object 
 1   laptop_name     204 non-null    object 
 2   display_size    205 non-null    float64
 3   processor_type  205 non-null    object 
 4   graphics_card   205 non-null    object 
 5   disk_space      205 non-null    object 
 6   discount_price  205 non-null    float64
 7   old_price       205 non-null    float64
 8   ratings_5max    205 non-null    object 
dtypes: float64(3), object(6)
memory usage: 14.5+ KB


In [5]:
laptops.shape

(205, 9)

In [6]:
laptops.processor_type.unique()

array([' Intel Celeron N4000', ' Intel Core i5-8250U',
       ' Intel Core i3-7020U', ' Intel Core i7-8550U',
       ' Intel Core i5 Dual Core', ' AMD A9-9425', ' Intel Core i7-8565U',
       ' Intel Core i7 6 Core', ' Intel Core i5 Quad Core',
       ' Intel Core i5-8265U', ' Intel Core M3', ' Intel Core i7-8750H',
       ' Intel Core i3-8145U', ' Intel Celeron 4205U',
       ' Intel Celeron N3060', ' Intel Celeron N3350',
       ' Intel Core i5-7200U', ' Intel Core i9', ' Intel Core i7-7820HK',
       ' Intel Core i7-7700HQ', ' Intel Core i7-6700HQ',
       ' Intel Core i7 Quad Core', ' Intel Core i7-7500U',
       ' Intel Core i3-5005U', ' Intel Core i3-6006U',
       ' Intel Core i3-8130U'], dtype=object)

In [7]:
df = laptops.copy()

### Data Preprocessing

In [8]:
def token_convertor(x):
    restricted = ['(', ')']
    tokens = word_tokenize(x)
    
    return [i for i in tokens if i not in restricted]

In [9]:
df['processor_type'] = df['processor_type'].apply(token_convertor)
df['graphics_card']  = df['graphics_card'].apply(token_convertor)
df['disk_space']     = df['disk_space'].apply(token_convertor)
df['brand']          = df['brand'].apply(token_convertor)

df['ratings_5max']   = df['ratings_5max'].apply(lambda x: float(x.split('/')[0]))
df['display_size']   = df['display_size'].apply(lambda x: [str(x)])
df['discount_price'] = df['discount_price'].apply(lambda x: [str(x)])
df['old_price']      = df['old_price'].apply(lambda x: [str(x)])

In [10]:
df['tags'] = df['brand']+df['display_size'] + df['processor_type']+df['graphics_card']+df['disk_space']+df['discount_price']+df['old_price']

In [11]:
df.head()

Unnamed: 0,brand,laptop_name,display_size,processor_type,graphics_card,disk_space,discount_price,old_price,ratings_5max,tags
0,[HP],Notebook 14-df0008nx,[14.0],"[Intel, Celeron, N4000]","[Intel, HD, Graphics, 600]","[64, GB, eMMC]",[1259.0],[1259.0],0.0,"[HP, 14.0, Intel, Celeron, N4000, Intel, HD, G..."
1,[Lenovo],IdeaPad 330S-14IKB,[14.0],"[Intel, Core, i5-8250U]","[Intel, UHD, Graphics, 620]","[1, TB, HDD]",[1849.0],[2099.0],3.3,"[Lenovo, 14.0, Intel, Core, i5-8250U, Intel, U..."
2,[Huawei],MateBook D Volta,[14.0],"[Intel, Core, i5-8250U]","[NVIDIA, GeForce, MX150, 2, GB]","[256, GB, SSD]",[2999.0],[3799.0],0.0,"[Huawei, 14.0, Intel, Core, i5-8250U, NVIDIA, ..."
3,[Dell],Inspiron 15 3567,[15.6],"[Intel, Core, i3-7020U]","[Intel, HD, Graphics, 620]","[1, TB, HDD]",[1849.0],[1849.0],0.0,"[Dell, 15.6, Intel, Core, i3-7020U, Intel, HD,..."
4,[Asus],VivoBook 15 X510UR,[15.6],"[Intel, Core, i7-8550U]","[NVIDIA, GeForce, 930MX, 2, GB]","[1, TB, HDD]",[2499.0],[3149.0],0.0,"[Asus, 15.6, Intel, Core, i7-8550U, NVIDIA, Ge..."


In [12]:
df.drop(['brand', 'display_size', 'processor_type',
       'graphics_card', 'disk_space', 'discount_price', 'old_price', 'ratings_5max'
        ],axis=1, inplace=True)

In [13]:
df.columns

Index(['laptop_name', 'tags'], dtype='object')

In [14]:
df.head()

Unnamed: 0,laptop_name,tags
0,Notebook 14-df0008nx,"[HP, 14.0, Intel, Celeron, N4000, Intel, HD, G..."
1,IdeaPad 330S-14IKB,"[Lenovo, 14.0, Intel, Core, i5-8250U, Intel, U..."
2,MateBook D Volta,"[Huawei, 14.0, Intel, Core, i5-8250U, NVIDIA, ..."
3,Inspiron 15 3567,"[Dell, 15.6, Intel, Core, i3-7020U, Intel, HD,..."
4,VivoBook 15 X510UR,"[Asus, 15.6, Intel, Core, i7-8550U, NVIDIA, Ge..."


In [15]:
df['sentence'] = df['tags'].apply(lambda x: " ".join(x))
df.head()

Unnamed: 0,laptop_name,tags,sentence
0,Notebook 14-df0008nx,"[HP, 14.0, Intel, Celeron, N4000, Intel, HD, G...",HP 14.0 Intel Celeron N4000 Intel HD Graphics ...
1,IdeaPad 330S-14IKB,"[Lenovo, 14.0, Intel, Core, i5-8250U, Intel, U...",Lenovo 14.0 Intel Core i5-8250U Intel UHD Grap...
2,MateBook D Volta,"[Huawei, 14.0, Intel, Core, i5-8250U, NVIDIA, ...",Huawei 14.0 Intel Core i5-8250U NVIDIA GeForce...
3,Inspiron 15 3567,"[Dell, 15.6, Intel, Core, i3-7020U, Intel, HD,...",Dell 15.6 Intel Core i3-7020U Intel HD Graphic...
4,VivoBook 15 X510UR,"[Asus, 15.6, Intel, Core, i7-8550U, NVIDIA, Ge...",Asus 15.6 Intel Core i7-8550U NVIDIA GeForce 9...


### Recommendation System Implementation

In [16]:


def extract_best_indices(m, topk, mask=None):

    # return the sum on all tokens of cosinus for each sentence
    if len(m.shape) > 1:
        cos_sim = np.mean(m, axis=0) 
    else: 
        cos_sim = m
    index = np.argsort(cos_sim)[::-1] # from highest idx to smallest score 
    if mask is not None:
        assert mask.shape == m.shape
        mask = mask[index]
    else:
        mask = np.ones(len(cos_sim))
    mask = np.logical_or(cos_sim[index] != 0, mask) #eliminate 0 cosine distance
    best_index = index[mask][:topk]  
    return best_index


In [17]:

def predict_spacy(model, query_sentence, embed_mat, topk=3):

    query_embed = model(query_sentence)
    mat = np.array([query_embed.similarity(line) for line in embed_mat])
    # keep if vector has a norm
    mat_mask = np.array(
        [True if line.vector_norm else False for line in embed_mat])
    best_index = extract_best_indices(mat, topk=topk, mask=mat_mask)
    return best_index



In [18]:
#Load pre-trained model
nlp = spacy.load("en_core_web_lg") 

# Apply the model to the sentences
df['spacy_sentence'] = df['sentence'].apply(lambda x: nlp(x)) 




In [19]:
df.head()

Unnamed: 0,laptop_name,tags,sentence,spacy_sentence
0,Notebook 14-df0008nx,"[HP, 14.0, Intel, Celeron, N4000, Intel, HD, G...",HP 14.0 Intel Celeron N4000 Intel HD Graphics ...,"(HP, 14.0, Intel, Celeron, N4000, Intel, HD, G..."
1,IdeaPad 330S-14IKB,"[Lenovo, 14.0, Intel, Core, i5-8250U, Intel, U...",Lenovo 14.0 Intel Core i5-8250U Intel UHD Grap...,"(Lenovo, 14.0, Intel, Core, i5, -, 8250U, Inte..."
2,MateBook D Volta,"[Huawei, 14.0, Intel, Core, i5-8250U, NVIDIA, ...",Huawei 14.0 Intel Core i5-8250U NVIDIA GeForce...,"(Huawei, 14.0, Intel, Core, i5, -, 8250U, NVID..."
3,Inspiron 15 3567,"[Dell, 15.6, Intel, Core, i3-7020U, Intel, HD,...",Dell 15.6 Intel Core i3-7020U Intel HD Graphic...,"(Dell, 15.6, Intel, Core, i3, -, 7020U, Intel,..."
4,VivoBook 15 X510UR,"[Asus, 15.6, Intel, Core, i7-8550U, NVIDIA, Ge...",Asus 15.6 Intel Core i7-8550U NVIDIA GeForce 9...,"(Asus, 15.6, Intel, Core, i7, -, 8550U, NVIDIA..."


In [20]:
query_sentence = 'Intel i5'

# Retrieve the embedded vectors as a matrix 
embed_mat = df['spacy_sentence'].values

# Predict
best_index = predict_spacy(nlp, query_sentence, embed_mat, 5)

display(laptops.iloc[best_index])

Unnamed: 0,brand,laptop_name,display_size,processor_type,graphics_card,disk_space,discount_price,old_price,ratings_5max
200,Lenovo,IdeaPad 320-15IKBRN,15.6,Intel Core i5-8250U,Intel GMA HD,1 TB HDD,2099.0,2099.0,3.8 / 5
121,Dell,XPS 13 9360,13.3,Intel Core i5-8250U,Intel GMA HD,256 GB PCIe NVMe M.2 SSD,4899.0,4899.0,4.3 / 5
82,Asus,X543UB,15.6,Intel Core i5-8250U,Intel UHD Graphics 620,1 TB HDD,1799.0,1949.0,0 / 5
11,Acer,Swift 5,14.0,Intel Core i7-8565U,Intel GMA HD,512 GB SSD,4499.0,5999.0,0 / 5
193,HP,ENVY 13-ab000nx,13.3,Intel Core i5-7200U,Intel GMA HD,256 GB PCIe NVMe M.2 SSD,3779.0,3779.0,0 / 5
