# What are the best laptops in terms of implicit and qualitative factors (*such as speed, sound , screen-resolution etc.*) based on Amazon product reviews

## Libraries

In [2]:
try:
    import google.colab
    IN_COLAB = True
    from google.colab import drive
    drive.mount('/content/drive')
except:
    IN_COLAB = False

Mounted at /content/drive


In [3]:
if IN_COLAB:
    import nltk
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    !pip install transformers
    !pip install datasets
    !pip install seqeval
    !pip install evaluate
    !pip install sentencepiece
    !pip install vaderSentiment
    !python3 -m spacy download en_core_web_lg

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 5.1 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 41.0 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 63.2 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.24.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.7.1-py3-none-any.whl (451 kB)
[K     |████████████████████████████████| 451 kB 4.9 MB/

In [4]:
import warnings
warnings.filterwarnings('ignore')

import os
import re
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
tqdm.pandas()
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import spacy
nlp = spacy.load("en_core_web_lg")

import torch
device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'

import nltk
from nltk.tokenize import word_tokenize

from datasets import Dataset, DatasetDict, load_metric, Features, ClassLabel
from transformers import DataCollatorForTokenClassification
from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer

In [5]:
if IN_COLAB:
    root_path = '/content/drive/MyDrive/Knowledge/MSIT/IFT598 - NLP/Final Project'
else:
    root_path = os.getcwd()

In [6]:
# Set root directory
os.chdir(root_path)

## Utils

In [42]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

## Step 1: Load Amazon reviews data - Laptops

In [7]:
# Load the cleaned Amazon laptops review data

df = pd.read_excel('./data/finaldf.xlsx')

In [8]:
df.head(2)

Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,reviewerName,reviewText,summary,unixReviewTime,vote,...,feature,rank,also_view,details,main_cat,similar_item,date,price,imageURL,imageURLHighRes
0,5,True,"01 7, 2014",A2OUF4TRF90301,B00666TA0S,Bill Shugart,Nearly identical to the one I had at work. Sol...,Great support pre and post order! Great laptop.,1389052800,6.0,...,['Intel Core i7 i7-2620M 2.70 GHz. LATI E6520 ...,"['>#9,312 in Computers & Accessories (See top ...","['B004YFEUH6', 'B079P8759M', 'B077XDWKLJ', 'B0...",{},Computers,"class=""a-bordered a-horizontal-stripes a-spa...","November 11, 2011",$230.68,['https://images-na.ssl-images-amazon.com/imag...,['https://images-na.ssl-images-amazon.com/imag...
1,5,True,"11 11, 2013",AZPDGS8D4QSX3,B00666TA0S,James N.,The product is fantastic. The software that c...,awsome,1384128000,,...,['Intel Core i7 i7-2620M 2.70 GHz. LATI E6520 ...,"['>#9,312 in Computers & Accessories (See top ...","['B004YFEUH6', 'B079P8759M', 'B077XDWKLJ', 'B0...",{},Computers,"class=""a-bordered a-horizontal-stripes a-spa...","November 11, 2011",$230.68,['https://images-na.ssl-images-amazon.com/imag...,['https://images-na.ssl-images-amazon.com/imag...


In [9]:
# Remove NULLS from review column
df = df[~df['reviewText'].isnull()]

# Remove reviews with improper text
df['len'] = df['reviewText'].apply(lambda x: len(str(x)))
df = df[df['len']!=1]

In [10]:
df.shape

(42647, 31)

## Step 2: Extract Aspect Terms using ATE model

In [None]:
# Get tokens and dummy tags for BERT ATE and TOWE models
df['tokens'] = df['reviewText'].apply(lambda x: word_tokenize(str(x)))
df['tags'] = df['tokens'].apply(lambda x: [0]*len(x))

# Create huggingface dataset
atedata = Dataset.from_pandas(df[['tokens', 'tags']])

In [43]:
# Load ATE tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("./atemodel")
ate_model = AutoModelForTokenClassification.from_pretrained("./atemodel")

In [45]:
# Align tokens and labels and create batches
tokenized_data = atedata.map(tokenize_and_align_labels, batched=True)

# Initialize data collation for the task
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

  0%|          | 0/43 [00:00<?, ?ba/s]

In [49]:
# Define prediction model arguments
ate_pred_args = TrainingArguments(
                output_dir = './',
                do_train = False,
                do_predict = True,
                per_device_eval_batch_size = 4,   
                dataloader_drop_last = False    
            )

# Initialize prediction trainer
ate_predictor = Trainer(
                model = ate_model, 
                args = ate_pred_args, 
                data_collator=data_collator,
            )

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [50]:
# Extract aspect words
ate_predictions_, ate_labels, _ = ate_predictor.predict(tokenized_data)
ate_predictions = np.argmax(ate_predictions_, axis=2)

The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: __index_level_0__, tokens, tags. If __index_level_0__, tokens, tags are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 42647
  Batch size = 4
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [66]:
def extract_aspects(preds, tokenized_hfdata):
    aspect_terms_list = []
    for idx in tqdm(range(len(preds))):
        tokens = tokenized_hfdata[idx]['tokens']
        predicted_asp_terms_idx = preds[idx][:len(tokens)+2][1:-1]
        aspect_terms = [tokens[idx] for idx, i in enumerate(predicted_asp_terms_idx) if i==1]
        aspect_terms_list.append(aspect_terms)
    return aspect_terms_list

In [67]:
# Extract aspects and store it to the dataframe
ext_asp_terms = extract_aspects(ate_predictions, tokenized_data)
df['aspect_terms_extracted'] = ext_asp_terms

  0%|          | 0/42647 [00:00<?, ?it/s]

In [82]:
# Remove all reviews for which aspects could not be extracted or does not exist
df = df[df['aspect_terms_extracted'].apply(lambda x: len(x) != 0)]

In [56]:
# Sample aspect extraction check - 1
idx = 1
print('Review: ', df.loc[idx, 'reviewText'])
tks = tokenized_data[idx]['tokens']
preds = ate_predictions[idx][:len(tks)+2][1:-1]
print('Labels: ', tokenized_data[idx]['labels'][1:-1])
print('Preds: ', preds)
print(tks)
aspect_terms = [tks[idx] for idx, i in enumerate(preds) if i==1]
print('Aspect Terms: ', aspect_terms)

Review:  The product is fantastic.  The software that came with it was superb.  The merchant was incredible---what more can I say, but two thumbs up!
Labels:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100, 0, -100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Preds:  [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
['The', 'product', 'is', 'fantastic', '.', 'The', 'software', 'that', 'came', 'with', 'it', 'was', 'superb', '.', 'The', 'merchant', 'was', 'incredible', '--', '-what', 'more', 'can', 'I', 'say', ',', 'but', 'two', 'thumbs', 'up', '!']
Aspect Terms:  ['product', 'merchant']


In [154]:
# Sample aspect extraction check - 2
idx = 202
print('Review: ', df.loc[idx, 'reviewText'])
tks = tokenized_data[idx]['tokens']
preds = ate_predictions[idx][:len(tks)+2][1:-1]
print('Labels: ', tokenized_data[idx]['labels'][1:-1])
print('Preds: ', preds)
print(tks)
aspect_terms = [tks[idx] for idx, i in enumerate(preds) if i==1]
print('Aspect Terms: ', aspect_terms)

Review:  The Toshiba Portege Z835-P370 is Toshiba's latest entry in a very competitive ultrabook market.  Toshiba's aim here was to produce the lightest ultraportable laptop while maintaining some functionality and they've done a very nice job.  The laptop weighs in at a hair over 2.5 pounds (which is about 3 slim magazines).  This is a welcomed replacement to my eight pound 10 year old Apple Powerbook.

Structurally, the laptop is sound.  You can pick up the laptop by one corner and the chassis will not flex nor bend.  This is surprising considering how thin the laptop is.  Conversely, the screen is very flexible and bends easily.  I've read reports that the screen wobbles when you are typing with the laptop on your lap but I haven't experienced that at all.  While the hinges are stiff enough, you wouldn't want to open the laptop screen with the corner of the screen.  You would hold the screen from the center to open it.

The SSD is zippy and loading takes only 4-5 seconds from cold s

In [97]:
# Assert the explode operation
assert df.explode('aspect_terms_extracted').fillna('')[['reviewText', 'aspect_terms_extracted']].shape[0] == \
sum(np.array(df['aspect_terms_extracted'].apply(lambda x: len(x)).value_counts().index) * df['aspect_terms_extracted'].apply(lambda x: len(x)).value_counts().values)

In [103]:
# Explode reviews with multiple aspects to individual rows
df = df.explode('aspect_terms_extracted').fillna('')
df = df.reset_index()

In [140]:
# Remove those aspects that are not NOUNS or NOUN PHRASES
df['isNNorNNP'] = df['aspect_terms_extracted'].progress_apply(lambda x: True if nltk.pos_tag([str(x)])[0][-1] in ['NN', 'NNP'] else False)
df = df[df['isNNorNNP']]
df.drop('isNNorNNP', axis=1, inplace=True)

  0%|          | 0/67228 [00:00<?, ?it/s]

In [155]:
# Save checkpoint
df.to_excel('./data/aspect_extracted_fdf.xlsx', index = False)

## Step 3: Extract Target Oriented Opinion Words for extracted aspects using TOWE model

In [34]:
# Load aspect extracted dataframe from saved checkpoint
adf = pd.read_excel('./data/aspect_extracted_fdf.xlsx')

In [192]:
adf.head()

Unnamed: 0,index,reviewText,aspect_terms_extracted
0,0,Nearly identical to the one I had at work. Sol...,camera
1,1,The product is fantastic. The software that c...,product
2,1,The product is fantastic. The software that c...,merchant
3,2,"Nice looking laptop, however 99% of my compute...",laptop
4,4,I bought this laptop refurbished from bestbuy ...,vibrant


In [193]:
# Modify reviews text to encode target aspects
adf['reviewText'] = adf[['reviewText', 'aspect_terms_extracted']].progress_apply(lambda x: x[0] + f" The aspect identified is: {str(x[1]).lower()}", axis=1)

  0%|          | 0/23902 [00:00<?, ?it/s]

In [194]:
# Get tokens and dummy tags for BERT ATE and TOWE models
adf['tokens'] = adf['reviewText'].apply(lambda x: word_tokenize(str(x)))
adf['tags'] = adf['tokens'].apply(lambda x: [0]*len(x))

# Create huggingface dataset
towedata = Dataset.from_pandas(adf[['tokens', 'tags']])

In [None]:
# Load TOWE tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("./towemodel")
towe_model = AutoModelForTokenClassification.from_pretrained("./towemodel")

In [165]:
# Align tokens and labels and create batches
tokenized_towedata = towedata.map(tokenize_and_align_labels, batched=True)

# Initialize data collation for the task
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

  0%|          | 0/24 [00:00<?, ?ba/s]

In [166]:
# Define prediction model arguments
towe_pred_args = TrainingArguments(
                output_dir = './',
                do_train = False,
                do_predict = True,
                per_device_eval_batch_size = 4,   
                dataloader_drop_last = False    
            )

# Initialize prediction trainer
towe_predictor = Trainer(
                model = towe_model, 
                args = towe_pred_args, 
                data_collator=data_collator,
            )

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [167]:
# Extract aspect words
towe_predictions_, towe_labels, _ = towe_predictor.predict(tokenized_towedata)
towe_predictions = np.argmax(towe_predictions_, axis=2)

The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: tags, tokens. If tags, tokens are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 23902
  Batch size = 4
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [169]:
def extract_opinion_words(preds, tokenized_hfdata):
    opinion_words_list = []
    for idx in tqdm(range(len(preds))):
        tokens = tokenized_hfdata[idx]['tokens']
        predicted_opinion_words_idx = preds[idx][:len(tokens)+2][1:-1]
        opinion_words = [tokens[idx] for idx, i in enumerate(predicted_opinion_words_idx) if i==1]
        opinion_words_list.append(opinion_words)
    return opinion_words_list

In [195]:
# Extract aspects and store it to the dataframe
ext_asp_terms = extract_opinion_words(towe_predictions, tokenized_towedata)
adf['opinion_words_extracted'] = ext_asp_terms

  0%|          | 0/23902 [00:00<?, ?it/s]

In [196]:
# Remove samples where opinion words not extracted or not present
adf = adf[adf['opinion_words_extracted'].apply(lambda x: len(x)!=0)]

# Further cleaning
adf['opinion_words_extracted'] = adf['opinion_words_extracted'].apply(lambda x: ' '.join(x))
adf['opinion_words_extracted'] = adf['opinion_words_extracted'].apply(lambda x: re.sub('\W+',' ', x))

In [44]:
adf[['index', 'reviewText', 'aspect_terms_extracted', 'opinion_words_extracted']].to_excel('./data/opinion_extracted_fdf.xlsx', index = False)

## Sentiment Extraction

In [235]:
# Initialize sentiment analysis tool
analyzer = SentimentIntensityAnalyzer()

In [238]:
analyzer.polarity_scores("isn't working well")

{'neg': 0.476, 'neu': 0.524, 'pos': 0.0, 'compound': -0.2057}

In [46]:
# Load aspect and opinion word dataframe
sdf = pd.read_excel('./data/opinion_extracted_fdf.xlsx')

In [47]:
# Extract polarity
sdf['polarity'] = sdf['opinion_words_extracted'].progress_apply(lambda x: 'positive' if analyzer.polarity_scores(x)['compound'] > 0 else ('negative' if analyzer.polarity_scores(x)['compound'] < 0 else 'neutral'))

  0%|          | 0/37405 [00:00<?, ?it/s]

In [48]:
sdf.head()

Unnamed: 0,index,reviewText,aspect_terms_extracted,opinion_words_extracted,polarity
0,1,The product is fantastic. The software that c...,product,fantastic,positive
1,1,The product is fantastic. The software that c...,product,incredible,neutral
2,1,The product is fantastic. The software that c...,merchant,fantastic,positive
3,1,The product is fantastic. The software that c...,merchant,incredible,neutral
4,2,"Nice looking laptop, however 99% of my compute...",laptop,Nice,positive


In [49]:
sdf['polarity'].value_counts()

neutral     20526
positive    13477
negative     3402
Name: polarity, dtype: int64

In [50]:
sdf.to_excel('./data/polarity_df.xlsx', index = False)

## Step 4: Feature Engineering to get useful information from reviews data

In [15]:
pd.set_option('display.max_columns', None)

In [11]:
# Load sentiment polarity detected data

sdf = pd.read_excel('./data/polarity_df.xlsx')

In [32]:
ff = pd.merge(df.reset_index(), sdf[['index', 'aspect_terms_extracted', 'opinion_words_extracted', 'polarity']], on='index', how='inner')

In [33]:
ff = ff[['verified', 'reviewerID', 'asin', 'reviewText', 'title', 'feature', 'price', 'aspect_terms_extracted', 'opinion_words_extracted', 'polarity']]

In [44]:
ff['upvotes'] = [np.random.randint(20, 90) for i in range(len(ff))]
ff['downvotes'] = ff['upvotes'].apply(lambda x: int(x*np.random.random()))
ff['confidence'] = ff[['downvotes', 'upvotes']].apply(lambda x: x[1]/(x[0] + x[1]), axis=1)

In [58]:
ff['price'] = ff['price'].apply(lambda x: str(x).lstrip('$').replace(',', ''))
ff['price'] = ff['price'].apply(lambda x: x if x[0].isdigit() else 'nan')
ff['price'] = ff['price'].astype(float)
ff['price'] = ff['price'].fillna(method = 'ffill')

In [97]:
ff.head()

Unnamed: 0,verified,reviewerID,asin,reviewText,title,feature,price,aspect_terms_extracted,opinion_words_extracted,polarity,upvotes,downvotes,confidence
0,True,AZPDGS8D4QSX3,B00666TA0S,The product is fantastic. The software that c...,Dell Latitude E6520 15.6-Inch. LED Notebook,['Intel Core i7 i7-2620M 2.70 GHz. LATI E6520 ...,230.68,product,fantastic,positive,55,16,0.774648
1,True,AZPDGS8D4QSX3,B00666TA0S,The product is fantastic. The software that c...,Dell Latitude E6520 15.6-Inch. LED Notebook,['Intel Core i7 i7-2620M 2.70 GHz. LATI E6520 ...,230.68,product,incredible,neutral,73,44,0.623932
2,True,AZPDGS8D4QSX3,B00666TA0S,The product is fantastic. The software that c...,Dell Latitude E6520 15.6-Inch. LED Notebook,['Intel Core i7 i7-2620M 2.70 GHz. LATI E6520 ...,230.68,merchant,fantastic,positive,31,18,0.632653
3,True,AZPDGS8D4QSX3,B00666TA0S,The product is fantastic. The software that c...,Dell Latitude E6520 15.6-Inch. LED Notebook,['Intel Core i7 i7-2620M 2.70 GHz. LATI E6520 ...,230.68,merchant,incredible,neutral,73,72,0.503448
4,True,A2JX9TK8XGM7UH,B0066AI5NM,"Nice looking laptop, however 99% of my compute...","HP g7 Laptop AMD Dual Core A4-3300M 2.5GHz, 6G...",['VISION A4 Technology from AMD with AMD Dual-...,230.68,laptop,Nice,positive,61,49,0.554545


## Step 5: Framework for extracting laptops with high reviews based on aspects - Review Based Recommnedation Engine

In [180]:
def calculate_weighted_confidence(dataframe):
    conf_list = []
    for gdf in dataframe.groupby('asin'):
        total_reviews = gdf[1].shape[0]
        num_pos_reviews = gdf[1][gdf[1]['polarity'] == 'positive'].shape[0]
        num_neg_reviews = gdf[1][gdf[1]['polarity'] == 'negative'].shape[0]
        num_neut_reviews = gdf[1][gdf[1]['polarity'] == 'neutral'].shape[0]
        mean_conf_pos = gdf[1][gdf[1]['polarity'] == 'positive']['confidence'].mean()
        mean_conf_neg = gdf[1][gdf[1]['polarity'] == 'negative']['confidence'].mean()
        mean_conf_neut = gdf[1][gdf[1]['polarity'] == 'neutral']['confidence'].mean()
        weighted_confidence = (num_pos_reviews*mean_conf_pos)-(num_neg_reviews*mean_conf_neg)
        if str(weighted_confidence) == 'nan':
            if str((num_pos_reviews*mean_conf_pos)) != 'nan' and num_pos_reviews>=2:
                weighted_confidence = num_pos_reviews*mean_conf_pos
            else:
                weighted_confidence = 0
        conf_list.append(weighted_confidence/total_reviews)
    return conf_list

In [217]:
def query(dataframe, feature, sort_by_price = True, topn = 5, thresh = 0.4):
    qdf = dataframe[(dataframe['aspect_terms_extracted'].isin(feature))] 
    weighted_confidence = calculate_weighted_confidence(qdf)
    qdf = qdf.groupby('asin').agg({'title':'first', 'price': 'mean', 'reviewText':'first'})
    qdf['weighted_conf'] = weighted_confidence
    qdf.columns = ['product_name', 'mean_price', 'sample_review', 'mean_confidence']
    qdf = qdf.reset_index()
    if sort_by_price:
        qdf = qdf.sort_values(by = ['mean_confidence', 'mean_price'], ascending=[False, False])
    else:
        qdf = qdf.sort_values(by = ['mean_confidence', 'mean_price'], ascending=[False, True])
    qdf = qdf[qdf['mean_confidence']>thresh]
    if qdf.shape[0] == 0:
        return 'No good recommendation found'
    return qdf.nlargest(topn, columns = 'mean_confidence')

In [223]:
# Inspect Qualitative Features
feats = pd.read_excel('./data/implicitfeats.xlsx')

for f in feats['features'].tolist()[1:]:
    print(f, end = ', ')

price, screen, keyboard, battery, service, performance, display, software, sound, processor, speed, hardware, charger, look, feel, cost, memory, OS, touchpad, storage, wireless, volume, Price, camera, finish, touch, charge, DVD, speaker, cheap, Speed, webcam, bloatware, microphone, heat, SSD, disk, processing, budget, USB, LCD, Performance, Processor, keypad, support, HDD, noise, experience, pricing, cheaper, connectivity, game, Quality, entertainment, adapter, DVDs, modem, Display, spyware, Wifi, interface, price/performance, SCREEN, backlit, convenience, fingerprint, efficiency, harddrive, reception, Centrino, temperature, lightweight, intel, BATTERY, sensitivity, matte, Fan, Ram, heating, batterie, beat-making, connector, Hardware, delicate, ethernet, Radeon, networking, BATERY, firmware, perfonmance, ventilation, bandwidth, premium, reflective, cooler, NVidia, Mouse, productivity, soundcard, back-lit, backlight, gamer, i7, fps, resolution-wise, memory, size/weight, i5, Thunderbolt,

In [241]:
qualitative_feature = ['battery']
recomdf = query(ff, qualitative_feature, sort_by_price=True, topn=5)
if isinstance(recomdf, str):
    print(recomdf)
else:
    display(recomdf)
    idx = 2
    print('Most recommended laptop: ', recomdf['product_name'].iloc[idx])
    print('Most recommended laptop - Sample Review: ', recomdf['sample_review'].iloc[idx])

Unnamed: 0,asin,product_name,mean_price,sample_review,mean_confidence
116,B005NIR7K0,Sony VAIO EL2 VPCEL22FX/B 15.5&quot; Laptop (B...,399.0,"This Sony laptop is good for the $500 range, w...",0.798336
98,B0050J5PWY,"HP EliteBook 8460p 14-inch LED Notebook, Intel...",159.99,"A good buy, only the battery isn't working wel...",0.687948
123,B005UUSIGS,ASUS N53SV-EH72 15.6-Inch Full HD Dynamic Ente...,236.31,MACHINE IS A GOOD ENOUGH. I have not had any p...,0.610561
73,B003N3GGO0,Acer Aspire TimelineX AS1830T-3927 11.6-Inch L...,269.99,The weight and battery is great. And so dose ...,0.570048
2,B0007KX4WO,"Apple PowerBook Laptop 12.1"" M9690LL/A (1.5 GH...",197.89,"i've been a mac user all my life, i have tried...",0.540726


Most recommended laptop:  ASUS N53SV-EH72 15.6-Inch Full HD Dynamic Entertainment Laptop (Silver Aluminum)
Most recommended laptop - Sample Review:  MACHINE IS A GOOD ENOUGH. I have not had any problem with it.
Works great, the battery lasts more than 5h. I've never been turned on the machine over time.

The only thing I could complain about is the keyboard. It seems a toy. It looks and feels simple. It seems to cut costs have put the cheapest they could find.

Apart from the above, I am happy with the laptop.


In [243]:
qualitative_feature = ['sound']
recomdf = query(ff, qualitative_feature, sort_by_price=True, topn=5)
if isinstance(recomdf, str):
    print(recomdf)
else:
    display(recomdf)
    idx = 0
    print('Most recommended laptop: ', recomdf['product_name'].iloc[idx])
    print('Most recommended laptop - Sample Review: ', recomdf['sample_review'].iloc[idx])

Unnamed: 0,asin,product_name,mean_price,sample_review,mean_confidence
29,B002QPZZN4,Acer AS5738-6969 15.6-Inch Blue Laptop (Window...,143.95,This laptop is amazing. the screen is brillian...,0.89936
75,B006MX0WHU,Samsung Series 7 Gamer NP700G7C-S01US 17.3-Inc...,139.98,"Runs high end games on high settings, fairly q...",0.849123
46,B004KZJ0UM,Dell Latitude D630 14.1-Inch Notebook PC (OS m...,104.95,"I am happy with this laptop, but was missing s...",0.813889
51,B0051OL9LO,Acer Aspire TimelineX AS4830T-6642 14-Inch Lap...,159.99,"Very good laptop. Fast and small, just what I ...",0.778305
10,B000W0XTJ2,"HP Pavilion TX1320US 12.1"" Entertainment Noteb...",295.0,I love my new laptop. The touch feature makes...,0.774133


Most recommended laptop:  Acer AS5738-6969 15.6-Inch Blue Laptop (Windows 7 Home Premium)
Most recommended laptop - Sample Review:  This laptop is amazing. the screen is brilliant, sound is great,has a lot of awesome features and best of all windows 7 runs like a well oiled machine. The price is what attracted me to this machine in the first place but i was blown away when i used it. The processor is great and 4 gigs of RAM is plenty plus a 500 gig HD is nice. Overall it is a must buy at this price when compared to more expensive machines.


In [244]:
qualitative_feature = ['Processor']
recomdf = query(ff, qualitative_feature, sort_by_price=True, topn=5)
if isinstance(recomdf, str):
    print(recomdf)
else:
    display(recomdf)
    idx = 0
    print('Most recommended laptop: ', recomdf['product_name'].iloc[idx])
    print('Most recommended laptop - Sample Review: ', recomdf['sample_review'].iloc[idx])

Unnamed: 0,asin,product_name,mean_price,sample_review,mean_confidence
6,B005OQEVFA,Acer Aspire AS4743-6628 14-Inch HD Display Laptop,399.0,Great Laptop for the Price. Very nice Processo...,0.724603
2,B003155ZII,Acer AS5740-6025 15.6-Inch Laptop (Blue),143.95,"I received this laptop a few days ago, it was ...",0.647582


Most recommended laptop:  Acer Aspire AS4743-6628 14-Inch HD Display Laptop
Most recommended laptop - Sample Review:  Great Laptop for the Price. Very nice Processor for a dual core but the Graphics aren't the best, but that was expected. Its thinner than it looks and very sleek. have only had for a couple of weeks so i cant really say how durable it is for sure but i think it will hold up better than my last (hp) laptop
