### IMPORTS AND CONN ESTABLISHMENT

In [20]:
import os
import csv
import pandas as pd
from openai import OpenAI
from neo4j import GraphDatabase
# import genai
from dotenv import load_dotenv
load_dotenv()
import tiktoken
import yaml
# from utils.embeddings_utils import get_embedding

In [2]:
class Neo4jConnection:
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [3]:
with open("../config.yaml", "r") as file:
    config = yaml.safe_load(file)

# Accessing keys and credentials
openai_api_key = config["openai"]["api_key"]
neo4j_uri = config["neo4j"]["uri"]
neo4j_user = config["neo4j"]["user"]
neo4j_password = config["neo4j"]["password"]


In [4]:
## Connection Strings
conn = Neo4jConnection(uri=neo4j_uri, user=neo4j_user, pwd=neo4j_password)
from py2neo import Graph, Node, Relationship, NodeMatcher
conn.query("MATCH (n) RETURN COUNT(n)")

# Establish connection
graph = Graph(neo4j_uri, auth=(neo4j_user, neo4j_password))

### Checking the connection by looking over the data in the Database
graph.run("MATCH (n) return count(n)")

count(n)
46206


### CREATING EMBEDDINGS

In [9]:
embedding_model = "text-embedding-3-small"
embedding_encoding = "cl100k_base"
max_tokens = 8000 

In [10]:
def get_plots(limit=None):
    driver = GraphDatabase.driver('bolt://localhost:7687',auth=("neo4j",pwd))
    driver.verify_connectivity()
    query = """MATCH (m:ARTICLE) WHERE m.detail_desc IS NOT NULL
    RETURN m.article_id AS id,m.title as title, m.product_group_name AS product_group_name, m.product_type_name as product_type_name, m.detail_desc as detail_desc """
    movies, summary, keys = driver.execute_query(
        query
    )
    driver.close()
    return movies

In [17]:
def generate_embeddings(file_name, limit=None):
    csvfile_out = open(file_name, 'w', encoding='utf8', newline='')
    fieldnames = ['title','embedding']
    output_plot = csv.DictWriter(csvfile_out, fieldnames=fieldnames)
    output_plot.writeheader()
    movies = get_plots(limit=limit)
#     print(len(movies))
    llm = OpenAI(api_key=openai_api_key)
    for movie in movies:
#         print(movie['title'])
        desc = f"{movie['detail_desc']}"
        response = llm.embeddings.create(
            input=desc,
            model='text-embedding-ada-002')
        output_plot.writerow({
            'title': movie['title'],
            'embedding': response.data[0].embedding
        })

    csvfile_out.close()
generate_embeddings('detail-plot-embeddings.csv')

In [None]:
df = pd.read_csv("detail-plot-embeddings.csv")

#### Adding embeddings back to DataBase

In [36]:
for index, row in df.iterrows():
    if row['embedding'] is not None:
        graph.run("""
        MATCH (a:ARTICLE {title: $title})
        SET a.embedding = apoc.convert.fromJsonList($embedding)
        """, title=row['title'], embedding=row['embedding'])

In [56]:
# query = """MATCH (n:ARTICLE)
# RETURN n.article_id AS ArticleID, apoc.meta.type(n.article_id) AS Type
# LIMIT 10;"""

# conn.query(query)

In [47]:
# result = graph.run("MATCH (a:ARTICLE) RETURN a.article_id AS article_id, a.embedding AS embedding").data()

# # Prepare embeddings for similarity search
# article_ids = [row['article_id'] for row in result]
# embeddings = [row['embedding'] for row in result]


In [46]:
# from sklearn.metrics.pairwise import cosine_similarity
# import numpy as np

# # Compute similarity scores
# similarities = cosine_similarity([Ques_embedding], embeddings)

# # Rank articles by similarity
# sorted_indices = np.argsort(-similarities[0])  # Descending order
# top_articles = [(article_ids[i], similarities[0][i]) for i in sorted_indices[:5]]  # Top 5
# print("Top Recommendations:", top_articles)


### CREATING VECTOR INDEX
##### to perform vector search 

In [37]:
query_vectorIndex = """CREATE VECTOR INDEX moviePlots1 IF NOT EXISTS
FOR (m:ARTICLE)
ON m.embedding
OPTIONS {indexConfig: {
 `vector.dimensions`: 1536,
 `vector.similarity_function`: 'cosine'
}}"""
conn.query(query_vectorIndex)

[]

#### check all the existing vector indexs

In [5]:
query_checkout_vectors = """SHOW INDEXES  YIELD id, name, type, state, populationPercent WHERE type = "VECTOR" """
graph.run(query_checkout_vectors)

id,name,type,state,populationPercent
2,moviePlots1,VECTOR,ONLINE,100.0
3,plot,VECTOR,ONLINE,100.0


### User Query

In [65]:
user_query = ""

In [7]:
from openai import OpenAI
# Initialize the OpenAI client
client = OpenAI(api_key="openai_api_key")
def get_embedding(text, model="text-embedding-ada-002"):
    # Replace newline characters with spaces
    text = text.replace("\n", " ")
    # Call OpenAI API to get embeddings
    embedding = client.embeddings.create(input=[text], model=model).data[0].embedding
    return embedding
text_input = user_query
# Get embeddings for the text input
Ques_embedding = get_embedding(text_input, model='text-embedding-ada-002')

# Now 'embedding' variable contains the embeddings for the text input
# print(embedding)


In [66]:
### Retrieval Using Neo4j

In [62]:
openaikey = 'openai_api_key'
OPENAI_ENDPOINT = 'https://api.openai.com/v1/'
query_string = """
    CALL db.index.vector.queryNodes(
        'plot', 
        35, 
        $Ques_embedding
        ) YIELD node AS movie, score
    RETURN  movie.article_id, movie.title, movie.detail_desc, score
"""

result = conn.query(query_string, {'Ques_embedding':Ques_embedding
                
})
# print(result)
column_names = ['article_id','title', 'detail_desc', 'similarity_score'] 
answer  = pd.DataFrame(result, columns=column_names)

answer.head(20)

Unnamed: 0,article_id,title,detail_desc,similarity_score
0,873596002,Mallow hood,Divided Collection Cropped zip-through hoodie ...,0.91629
1,509893003,Chuck Hood,Men H&M Sport Sleeveless sports top in jersey ...,0.915604
2,505196001,Pearl Hoodie,"Divided Selected Jumper in a fine, airy knit w...",0.915405
3,657126001,Iceland hood,Kids Sports Sports jacket in fleece with a hoo...,0.915009
4,779136001,Croydon unitard,Divided Collection Sleeveless unitard in jerse...,0.914917
5,649323001,LOVE Top,Divided Projects Cropped top in jersey with a ...,0.914734
6,561479001,Chicago boxy ls,Ladies H&M Sport Fleece sports top with a funn...,0.914719
7,885803005,Cortina Fleece Jacket,Ladies H&M Sport Hooded jacket in soft fleece ...,0.914642
8,885803002,Cortina Fleece Jacket (C),Ladies H&M Sport Hooded jacket in soft fleece ...,0.914642
9,669091004,ROBIN HOOD.,Men Underwear Zip-through hoodie in sweatshirt...,0.914474


### Prep to prompt

In [63]:
### trail 01


import pandas as pd

def prepare_context_from_df(retrieved_items_df):
    context =''
    for idx, row in retrieved_items_df.iterrows():
        context += f"{idx + 1}. {row['title']}: {row['detail_desc']}\n"
    return context

# Example retrieved items as a DataFrame


context = prepare_context_from_df(answer.head(3))
print(context)



1. Mallow hood: Divided Collection Cropped zip-through hoodie in sweatshirt fabric with a jersey-lined hood and zip down the front. Boxy style with low-dropped shoulders, long wide sleeves and ribbing at the cuffs and hem. Soft brushed inside.
2. Chuck Hood: Men H&M Sport Sleeveless sports top in jersey with a mesh-lined hood with a drawstring and collar, and a rounded hem.
3. Pearl Hoodie: Divided Selected Jumper in a fine, airy knit with a double-layered wrapover hood and kangaroo pocket.



### Generation Phase

In [64]:
def generate_answer(user_query, context):
    messages = [
        {"role": "system", "content": "You are a helpful assistant providing product recommendations."},
        {"role": "user", "content": f"User Query: {user_query}\nContext:\n{context}\nProvide a recommendation for what can be paired with the user's described item."}
    ]
    # Generate response
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",  # Use 'gpt-4' for more advanced capabilities
        messages=messages,
        max_tokens=200
    ).to_dict()  # Convert to a dictionary

    # Extract the content
    return response['choices'][0]['message']['content'].strip()

# Example usage
user_query = "What can I pair with it?"
context = context
answer = generate_answer(user_query, context)
print("Generated Answer:\n", answer)


Generated Answer:
 For the Mallow Hood Cropped zip-through hoodie, you can pair it with high-waisted leggings or sweatpants for a casual and sporty look. You can also layer it over a plain white t-shirt or tank top for added comfort.

Alternatively, you can pair the Chuck Hood Sleeveless sports top with basketball shorts or track pants for a gym-ready outfit. Adding a pair of sneakers will complete the athletic look.

For the Pearl Hoodie Jumper, you can style it with high-waisted jeans or denim shorts for a relaxed and cozy ensemble. To elevate the outfit, you can accessorize with a beanie or baseball cap and ankle boots.

These suggestions aim to complement the different styles of hoodies you mentioned and provide versatile pairing options for various occasions.
