In [5]:
!pip install neo4j

Collecting neo4j
  Using cached neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Using cached neo4j-5.28.1-py3-none-any.whl (312 kB)
Installing collected packages: neo4j
Successfully installed neo4j-5.28.1


In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from neo4j import GraphDatabase

In [9]:
df = pd.read_csv("filtered_amazon_data.csv")

In [13]:
df.head(2)

Unnamed: 0,product_id,product_name,category,about_product,review_content,about_product_cleaned,review_content_cleaned
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,High Compatibility : Compatible With iPhone 12...,Looks durable Charging is fine tooNo complains...,high compatibility compatible iphone 12 1...,looks durable charging fine toono complai...
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,"Compatible with all Type C enabled devices, be...",I ordered this cable to connect my phone to An...,compatible type c enabled devices android...,ordered cable connect phone android auto ...


In [15]:
df = df.dropna(subset=["about_product_cleaned"])

In [17]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["about_product_cleaned"])


In [19]:
#Compute Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix)

In [21]:
#Create Product ID to Index Mapping
product_indices = pd.Series(df.index, index=df["product_id"]).drop_duplicates()


In [39]:
# Define a Recommendation Function
def get_similar_products(product_id, top_n=5):
    if product_id not in product_indices:
        return []

    idx = product_indices[product_id]  # Get product index
    sim_scores = list(enumerate(cosine_sim[idx, :].tolist()))  # Convert to list
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Sort by similarity

    top_products = [df["product_id"].iloc[i[0]] for i in sim_scores[1:top_n+1]]
    return top_products

In [41]:
print(cosine_sim.shape) 

(1465, 1465)


In [47]:
# Test Recommendation Function
recommended_products=get_similar_products("B07JW9H4J1")
print("Recommended Products:",recommended_products)

Recommended Products: ['B098NS6PVG', 'B096MSW6CT']


In [51]:
def get_product_details(product_id):
    product = df[df["product_id"] == product_id]
    if product.empty:
        return "Product Not Found"
    return product[["product_name", "about_product_cleaned"]].to_dict(orient="records")

for prod_id in recommended_products:
    print(get_product_details(prod_id))


[{'product_name': 'Ambrane Unbreakable 60W / 3A Fast Charging 1.5m Braided Type C Cable for Smartphones, Tablets, Laptops & other Type C devices, PD Technology, 480Mbps Data Sync, Quick Charge 3.0 (RCT15A, Black)', 'about_product_cleaned': 'compatible  type  c  enabled  devices  android  smartphone  mi  samsung  oppo  vivo  realme  oneplus  etc  tablet  laptop  macbook  chromebook  etc  supports  quick  charging  2  0  3  0  unbreakable  made  special  braided  outer  rugged  interior  bindings  ultra  durable  cable  affected  daily  rough  usage  ideal  length  ideal  length  1  5  meters  neither  short  like  typical  1meter  cable  long  like  2meters  cable  supports  maximum  3a  fast  charging  480  mbps  data  transfer  speed  6  months  manufacturer  warranty  date  purchase'}, {'product_name': 'Ambrane Unbreakable 60W / 3A Fast Charging 1.5m Braided Type C Cable for Smartphones, Tablets, Laptops & other Type C devices, PD Technology, 480Mbps Data Sync, Quick Charge 3.0 (RCT1

In [76]:
class Neo4jConnection:
    def __init__(self,url,user,password):
        self.driver= GraphDatabase.driver(url,auth=(user,password))
    def close(self):
        self.driver.close()
    def run_query(self,query,parameter):
        with self.driver.session() as session:
            return list(session.run(query,parameters=parameter))
;
db=Neo4jConnection("neo4j+s://3205dd99.databases.neo4j.io:7687","neo4j","bc_JiFacp8CDN1_cjqPsGox9PNprgu18yGVkTpRyLPA")

In [82]:
def get_similar_products_neo4j(product_id,top_n=5):
    query="""
    MATCH (p:Product {product_id: $product_id})<-[:REVIEWS]-(r:Review)-[:REVIEWS]->(other:Product)
    RETURN other.product_id AS recommended_product, COUNT(*) AS score
    ORDER BY score DESC 
    LIMIT $top_n
    """
    result=db.run_query(query,{"product_id":product_id,"top_n":top_n})
    return[record["recommended_product"] for record in result]
           
print(get_similar_products_neo4j("B07JW9H4J1"))
db.close()

['B07JW1Y6XV', 'B07JGDB5M1', 'B07LGT55SJ', 'B07JH1C41D', 'B07JH1CBGW']


In [84]:
def store_recommendations_neo4j():
    for product_id in df["product_id"]:  # df = Your DataFrame with product IDs
        similar_products = get_similar_products(product_id)  # Existing function (Pandas + Sklearn)
        
        for sim_product in similar_products:
            query = """
            MATCH (p1:Product {product_id: $product_id})
            MATCH (p2:Product {product_id: $sim_product})
            MERGE (p1)-[:SIMILAR_TO]->(p2)
            """
            db.run_query(query, {"product_id": product_id, "sim_product": sim_product})

store_recommendations_neo4j()  #  Run this once to store recommendations in Neo4j


  with self.driver.session() as session:


In [86]:
def get_recommendations_from_neo4j(product_id, top_n=5):
    query = """
    MATCH (p:Product {product_id: $product_id})-[:SIMILAR_TO]->(rec:Product)
    RETURN rec.product_id AS recommended_product
    LIMIT $top_n
    """
    result = db.run_query(query, {"product_id": product_id, "top_n": top_n})
    return [record["recommended_product"] for record in result]

# Test Neo4j-Based Recommendation
print(get_recommendations_from_neo4j("B07JW9H4J1"))


  with self.driver.session() as session:


['B098NS6PVG', 'B096MSW6CT']
