# Enter the Query and Get the Top 5 Most Relevant Reviews 

In [10]:
from sentence_transformers import SentenceTransformer, util
import numpy as np
import pandas as pd

# 1) Load the local model (no download)
model = SentenceTransformer('models/all-MiniLM-L6-v2')

# 2) Load embeddings and dataframe
embeddings = np.load('review_embeddings.npy')
df = pd.read_pickle('reviews_df.pkl')

print("✅ Model and embeddings loaded successfully!\n")

while True:
    # 3) Ask the user for a query
    query = input("Enter your search query (or 'exit' to quit): ").strip()
    if query.lower() == 'exit':
        print("👋 Exiting search. Goodbye!")
        break

    # 4) Encode the query
    query_embedding = model.encode(query)

    # 5) Compute cosine similarities
    scores = util.cos_sim(query_embedding, embeddings)[0].cpu().numpy()

    # 6) Get top-k results
    topk = 5
    topk_indices = np.argsort(-scores)[:topk]

    # 7) Display results
    print(f"\nTop {topk} results for: '{query}'\n")
    for idx in topk_indices:
        print(f"🔹 Score: {scores[idx]:.4f}")
        print(f"💬 Review: {df.iloc[idx]['reviewText']}\n")

    print("-" * 60 + "\n")


✅ Model and embeddings loaded successfully!


Top 5 results for: 'microsoft'

🔹 Score: 0.8081
💬 Review: I like Microsoft.

🔹 Score: 0.7894
💬 Review: What can you say, its microsoft.

🔹 Score: 0.6506
💬 Review: this operating system is by far the greatest made by Microsoft

🔹 Score: 0.6494
💬 Review: This is old, old, old software marketed as new stuff.  I've been ripped off. This, like everything else Microsoft touches these days, from the Ford Fusion's sync, through Windows 8, etc, is nothing but junk peddled on the rest of us so Microsoft can survive economically.  It is a has-been company as far as I'm concerned. :-(

🔹 Score: 0.6346
💬 Review: Best program to use. I would tell everyone to get all Microsoft  then the program give to us by our internet.

------------------------------------------------------------

👋 Exiting search. Goodbye!


In [11]:
from transformers import pipeline

sentiment_pipeline = pipeline("sentiment-analysis")

df.iloc[topk_indices]['reviewText']

reviews = df.iloc[topk_indices]['reviewText']
reviews = list(reviews)
reviews 


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


['I like Microsoft.',
 'What can you say, its microsoft.',
 'this operating system is by far the greatest made by Microsoft',
 "This is old, old, old software marketed as new stuff.  I've been ripped off. This, like everything else Microsoft touches these days, from the Ford Fusion's sync, through Windows 8, etc, is nothing but junk peddled on the rest of us so Microsoft can survive economically.  It is a has-been company as far as I'm concerned. :-(",
 'Best program to use. I would tell everyone to get all Microsoft  then the program give to us by our internet.']

In [12]:
reviews

['I like Microsoft.',
 'What can you say, its microsoft.',
 'this operating system is by far the greatest made by Microsoft',
 "This is old, old, old software marketed as new stuff.  I've been ripped off. This, like everything else Microsoft touches these days, from the Ford Fusion's sync, through Windows 8, etc, is nothing but junk peddled on the rest of us so Microsoft can survive economically.  It is a has-been company as far as I'm concerned. :-(",
 'Best program to use. I would tell everyone to get all Microsoft  then the program give to us by our internet.']

In [13]:
results = sentiment_pipeline(reviews)

In [14]:
actualStars = df["overall"].iloc[topk_indices]

In [17]:
for review, result, actualStar in zip(reviews, results, actualStars):
    print(f"Review: {review}")
    print(f"Sentiment: {result['label']}, Score: {result['score']:.2f}")
    if(result["label"] == "NEGATIVE"):

        star =  (-result["score"] +1 ) /2 *4 + 1

    else:
        star = (result["score"] + 1) /2 *4  + 1

    print("Star: ", star  )
    print("Actual Star: ", actualStar, "\n")

Review: I like Microsoft.
Sentiment: POSITIVE, Score: 1.00
Star:  4.998371958732605
Actual Star:  5 

Review: What can you say, its microsoft.
Sentiment: NEGATIVE, Score: 0.82
Star:  1.3580284118652344
Actual Star:  5 

Review: this operating system is by far the greatest made by Microsoft
Sentiment: POSITIVE, Score: 1.00
Star:  4.999440550804138
Actual Star:  5 

Review: This is old, old, old software marketed as new stuff.  I've been ripped off. This, like everything else Microsoft touches these days, from the Ford Fusion's sync, through Windows 8, etc, is nothing but junk peddled on the rest of us so Microsoft can survive economically.  It is a has-been company as far as I'm concerned. :-(
Sentiment: NEGATIVE, Score: 1.00
Star:  1.0009032487869263
Actual Star:  1 

Review: Best program to use. I would tell everyone to get all Microsoft  then the program give to us by our internet.
Sentiment: POSITIVE, Score: 0.64
Star:  4.2777345180511475
Actual Star:  5 



In [16]:
df.iloc[topk_indices]

Unnamed: 0,overall,verified,reviewTime,reviewerID,asin,style,reviewerName,reviewText,summary,unixReviewTime,vote,image
7901,5,True,"05 9, 2014",A3U7J61Z9UERWQ,B00EDSI7QO,,THOMAS L PETREE,I like Microsoft.,I love it,1399593600,,
8993,5,True,"05 14, 2016",A2Q51L8MVGK5L6,B00HV9IM58,{'Platform:': ' O365 Personal Keycard'},nevrider,"What can you say, its microsoft.",you need this to be compitiable,1463184000,,
5406,5,False,"10 18, 2017",AFD6W5PLS36DY,B004Q0T0LU,,Anthony Joshua,this operating system is by far the greatest m...,Five Stars,1508284800,,
7264,1,True,"08 31, 2014",A3NRHAQEUZTW4G,B00B1TEIRU,{'Platform:': ' O365 Home Download'},Michael P. Murphy,"This is old, old, old software marketed as new...",like everything else Microsoft touches these days,1409443200,2.0,
1471,5,False,"07 28, 2014",A3UEGYW49RUP1P,B000HCZ8EO,{'Format:': ' Software'},Judy,Best program to use. I would tell everyone to ...,Best program to use,1406505600,,
