# Product Ranking based on Review Title, Pros, Cons, Review User Response

In [1]:
# importing the necessary libraries
import pandas as pd
import glob
from sentence_transformers import SentenceTransformer, util

class ranking_products:
    
    def __init__(self):
        # initializing the SentenceTransformer model to generate embeddings
        self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        
    def ranking(self, query: str):
        products_review_content = []
        count = 0
        # iterating through each product file and combining multiple reviews of a product
        for csv in glob.glob(r"C:\\Users\\maganti\\OneDrive - UNT System\\Documents\\Ding Sir Project\\Data\\SoftwareAdvice\\SoftwareAdviceCSVFiles\\*"):
            count += 1
            # print(count)
            data = pd.read_csv(csv)
            #print(data.shape)
            review_col_data = ""
            for col in ['Review_title', 'Pros', 'Review User Response', 'Cons']:
                for ele in data[col].values:
                    if type(ele) == str:
                        review_col_data = review_col_data + " " + ele
            # appending  'Review_title', 'Pros', 'Review User Response', 'Cons' values to this product review content
            products_review_content.append(review_col_data)
            
        # generating embeddings for each product reviews
        products_embeddings = [self.model.encode(review, convert_to_tensor=True) for review in products_review_content]
        
        # generating scores for each product comparing the given query
        product_scores = [util.pytorch_cos_sim(embedding, self.model.encode(query, convert_to_tensor=True)) for embedding in products_embeddings]
        return product_scores

In [2]:
ranks = ranking_products()

# passing query from a user to identify the best product suitable for a team
product_ranking_scores = ranks.ranking("Accounting Manager Technology Very Small team size 2-5")

In [5]:
product_ranking_scores[0].item()

0.33471354842185974

# Product Ranking based on Review Title, Pros, Review User Response

In [7]:
import pandas as pd
import glob
from sentence_transformers import SentenceTransformer, util

class ranking_products_1:
    
    def __init__(self):
        # initializing the SentenceTransformer model to generate embeddings
        self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        
    def ranking(self, query: str):
        products_review_content = []
        count = 0
        # iterating through each product file and combining multiple reviews of a product
        for csv in glob.glob(r"C:\\Users\\maganti\\OneDrive - UNT System\\Documents\\Ding Sir Project\\Data\\SoftwareAdvice\\SoftwareAdviceCSVFiles\\*"):
            count += 1
            # print(count)
            data = pd.read_csv(csv)
            # print(data.shape)
            review_col_data = ""
            for col in ['Review_title', 'Pros', 'Review User Response']:
                for ele in data[col].values:
                    if type(ele) == str:
                        review_col_data = review_col_data + " " + ele
            # appending  'Review_title', 'Pros', 'Review User Response' values to this product review content
            products_review_content.append(review_col_data)
        
        # generating embeddings for each product reviews
        products_embeddings = [self.model.encode(review, convert_to_tensor=True) for review in products_review_content]
        
        # generating scores for each product comparing the given querys
        product_scores = [util.pytorch_cos_sim(embedding, self.model.encode(query, convert_to_tensor=True)) for embedding in products_embeddings]
        return product_scores

In [None]:
ranks_1 = ranking_products_1()
product_ranking_scores_1 = ranks_1.ranking("Accounting Manager Technology Very Small team size 2-5")

In [39]:
product_ranking_scores_1[0].item()

0.33471354842185974

# Product Ranking based on Cons

In [40]:
import pandas as pd
import glob
from sentence_transformers import SentenceTransformer, util

class ranking_products_2:
    
    def __init__(self):
        # initializing the SentenceTransformer model to generate embeddings
        self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        
    def ranking(self, query: str):
        products_review_content = []
        count = 0
        # iterating through each product file and combining multiple reviews of a product
        for csv in glob.glob(r"C:\\Users\\maganti\\OneDrive - UNT System\\Documents\\Ding Sir Project\\Data\\SoftwareAdvice\\SoftwareAdviceCSVFiles\\*"):
            count += 1
            # print(count)
            data = pd.read_csv(csv)
            #print(data.shape)
            review_col_data = ""
            for col in ['Cons']:
                for ele in data[col].values:
                    if type(ele) == str:
                        review_col_data = review_col_data + " " + ele
            # appending  'Cons' values to this product review content           
            products_review_content.append(review_col_data)
        
        # generating embeddings for each product reviews
        products_embeddings = [self.model.encode(review, convert_to_tensor=True) for review in products_review_content]
        
        # generating scores for each product comparing the given query
        product_scores = [util.pytorch_cos_sim(embedding, self.model.encode(query, convert_to_tensor=True)) for embedding in products_embeddings]
        return product_scores

In [None]:
ranks_2 = ranking_products_2()
product_ranking_scores_2 = ranks_2.ranking("Accounting Manager Technology Very Small team size 2-5")

In [42]:
product_ranking_scores_2[0].item()

0.1520843505859375

# Generating CSV file including all scores

In [None]:
import pandas as pd
import os

# listing all the products obtained from SoftwareAdvice website
products = os.listdir(r"C:\\Users\\maganti\\OneDrive - UNT System\\Documents\\Ding Sir Project\\Data\\SoftwareAdvice\\SoftwareAdviceCSVFiles")
df = pd.DataFrame({'Product' : [product.split('.')[0] for product in products[1:]],
                   'ReviewTitle+Pros+Cons+ReviewUserResponse-Score' : [tensor.item() for tensor in product_ranking_scores],
                   'ReviewTitle+Pros+ReviewUserResponse-Score' : [tensor.item() for tensor in product_ranking_scores_1],
                   'Cons-Score' : [tensor.item() for tensor in product_ranking_scores_2]})
df

In [81]:
# creating a csv file of above dataframe
df.to_csv("Similarity_scores_of_products.csv", index = False)

# Descending order of products based on ReviewTitle+Pros+Cons+ReviewUserResponse-Score only

In [None]:
import pandas as pd
import os

products = os.listdir(r"C:\\Users\\maganti\\OneDrive - UNT System\\Documents\\Ding Sir Project\\Data\\SoftwareAdvice\\SoftwareAdviceCSVFiles")
df1 = pd.DataFrame({'Product' : [product.split('.')[0] for product in products[1:]],
                   'ReviewTitle+Pros+Cons+ReviewUserResponse-Score' : [tensor.item() for tensor in product_ranking_scores]})
df1.sort_values(by = 'ReviewTitle+Pros+Cons+ReviewUserResponse-Score', ascending = False)

In [82]:
# creating a csv file of above dataframe
df1.to_csv("Order_of_products_with_ReviewTitle+Pros+Cons+ReviewUserResponse-Score.csv", index = False)

# Descending order of products based on ReviewTitle+Pros+ReviewUserResponse-Score only

In [None]:
import pandas as pd
import os

products = os.listdir(r"C:\\Users\\maganti\\OneDrive - UNT System\\Documents\\Ding Sir Project\\Data\\SoftwareAdvice\\SoftwareAdviceCSVFiles")
df2 = pd.DataFrame({'Product' : [product.split('.')[0] for product in products[1:]],
                   'ReviewTitle+Pros+ReviewUserResponse-Score' : [tensor.item() for tensor in product_ranking_scores_1]})
df2.sort_values(by = 'ReviewTitle+Pros+ReviewUserResponse-Score', ascending = False)

In [83]:
# creating a csv file of above dataframe
df2.to_csv("Order_of_products_with_ReviewTitle+Pros+ReviewUserResponse-Score.csv", index = False)

# Descending order of products based on Cons-Score only

In [None]:
import pandas as pd
import os

products = os.listdir(r"C:\\Users\\maganti\\OneDrive - UNT System\\Documents\\Ding Sir Project\\Data\\SoftwareAdvice\\SoftwareAdviceCSVFiles")
df3 = pd.DataFrame({'Product' : [product.split('.')[0] for product in products[1:]],
                   'Cons-Score' : [tensor.item() for tensor in product_ranking_scores_2]})
df3.sort_values(by = 'Cons-Score', ascending = False).head(15)

In [84]:
# creating a csv file of above dataframe
df3.to_csv("Order_of_products_with_Cons-Score.csv", index = False)