In [None]:
%pip install langchain neo4j sentence-transformers llama-index openai
%pip install pandas numpy
%pip install requests

In [1]:
%pip install -qU langchain-ollama

Note: you may need to restart the kernel to use updated packages.


In [41]:
# Import necessary libraries 
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
from sentence_transformers import SentenceTransformer
from langchain import LLMChain, PromptTemplate
from langchain.llms import Ollama

from langchain_ollama import ChatOllama

import requests

In [58]:
# Custom OpenAI API: using deepbricks API

# Configure Deepbricks API and GPT-4o-mini model
BASE_URL = "https://api.deepbricks.ai/v1/chat/completions"
API_KEY = "sk-WnstQIiLBZkwq17zqjYIOhMIOJUVNfbWCxlvOjmPXZnCDVPA"
MODEL_NAME = "gpt-4o-mini"
temperature = 0.7
max_tokens = 150

class CustomOpenAI:
    def __init__(self, base_url, api_key, model):
        self.base_url = base_url
        self.headers = {
            'Authorization': f'Bearer {api_key}',
            'Content-Type': 'application/json',
        }
        self.model = model

    def generate(self,messages, tem=temperature, max_t=max_tokens):
        payload = {
            'model': self.model,  # add model name
            'messages': messages,  # send messages as context
            'temperature': tem, # add temperature
            'max_tokens': max_t # add max tokens
        }

        response = requests.post(
            f'{self.base_url}',
            headers=self.headers,
            json=payload
        )

        if response.status_code == 200:
            return response.json()['choices'][0]['message']['content'].strip()
        else:
            raise Exception(f"API call failed: {response.status_code}, {response.text}")

# using Custom OpenAI API class
llm = CustomOpenAI(base_url=BASE_URL, api_key=API_KEY, model=MODEL_NAME)

In [25]:
# Custom Ollama Local API

# Configure Ollama Local API and  model
# https://github.com/ollama/ollama/blob/main/docs/openai.md
BASE_URL = "http://localhost:11434/api/generate"
API_KEY = "sk-WnstQIiLBZkwq17zqjYIOhMIOJUVNfbWCxlvOjmPXZnCDVPA"
MODEL_NAME = "llama3.1:latest"
temperature = 0.7
max_tokens = 150

class OllamaLLM:
    def __init__(self, model, base_url):
        self.model_name = model
        self.base_url = base_url

    def generate(self, prompt, temperature=0.7, max_tokens=150):
        url = f"{self.base_url}"
        payload = {
            "model": self.model_name,
            "prompt": prompt,
            "temperature": temperature,
            "max_tokens": max_tokens
        }
        headers = {
            "Content-Type": "application/json"
        }

        response = requests.post(url, json=payload, headers=headers)

        if response.status_code == 200:
            return response.json()["text"]
        else:
            raise Exception(f"Error in Ollama API: {response.status_code}, {response.text}")

# initialize Ollama LLMs
llm = OllamaLLM(model=MODEL_NAME,base_url=BASE_URL)

In [1]:
### LLM
from langchain_ollama import ChatOllama

local_llm = "llama3.2:latest"
base_url = "http://localhost:11434/api/generate"
llm = ChatOllama(model=local_llm, base_url, temperature=0.7,num_predict = 256)
llm_json_mode = ChatOllama(model=local_llm, base_url, temperature=0.7, format="json")


SyntaxError: positional argument follows keyword argument (3275993531.py, line 6)

In [10]:
# 读取 CSV 文件
data = pd.read_csv('test-data-2.csv')

# Replace NaN values with empty strings for Neo4j compatibility
data = data.replace({np.nan: ''})

# 连接到 Neo4j
driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "test_password"))

# 创建服装节点以及它们之间的关系, 创建Neo4j中的图节点和关系
def create_clothing_graph(tx, row):
    query = (
        """
        MERGE (c:Clothing {id: $id, name: $name, brand: $brand, type: $type, group: $group, details: $details, 
                           price: $price, currency: $currency, color: $color, size: $size, 
                           style: $style, pattern: $pattern, material: $material, occasion: $occasion})
        MERGE (b:Brand {name: $brand})
        MERGE (t:Type {name: $type})
        MERGE (b)-[:MAKES]->(c)
        MERGE (c)-[:BELONGS_TO]->(t)
        """
    )
    tx.run(query, id=row['id'], name=row['name'], brand=row['brand'], type=row['type'], group=row['group'], 
           details=row['details'], price=row['price'], currency=row['currency'], color=row['color'], 
           size=row['size'], style=row['style'], pattern=row['pattern'], material=row['material'], 
           occasion=row['occasion'])

# 将数据导入 Neo4j 并建立图
with driver.session() as session:
    data.apply(lambda row: session.execute_write(create_clothing_graph, row), axis=1)

# 删除全文索引
def delete_fulltext_index():
    with driver.session() as session:
        session.run("DROP INDEX clothingIndex IF EXISTS")

# 创建全文索引
def create_fulltext_index():
    with driver.session() as session:
        session.run("""
        CREATE FULLTEXT INDEX clothingIndex
        FOR (c:Clothing)
        ON EACH [c.id, c.name, c.brand, c.details]
        """)

# 在服装节点创建后调用删除索引和创建索引函数
delete_fulltext_index()  # 先删除索引（如果存在）
create_fulltext_index()  # 然后创建新的索引


In [11]:
# 加载嵌入模型
model = SentenceTransformer('all-MiniLM-L6-v2')

# 生成每个服装的描述嵌入向量
data['description'] = data[['name', 'brand', 'type', 'details']].apply(lambda x: ' '.join(x), axis=1)
embeddings = model.encode(data['description'].tolist())

# 存储嵌入到 Neo4j
def store_embedding(tx, name, embedding):
    query = "MATCH (c:Clothing {name: $name}) SET c.embedding = $embedding"
    tx.run(query, name=name, embedding=embedding.tolist())

with driver.session() as session:
    for i, row in data.iterrows():
        session.write_transaction(store_embedding, row['name'], embeddings[i])

  session.write_transaction(store_embedding, row['name'], embeddings[i])


In [12]:
# # 定义一个函数，从 Neo4j 中加载所有服装的嵌入向量
# def load_all_embeddings(tx):
#     result = tx.run("MATCH (c:Clothing) RETURN c.name, c.embedding")
#     # 遍历查询结果并打印每一条记录
#     for record in result:
#         print(f"Name: {record['c.name']}, Embedding: {record['c.embedding']}")
#     return {record['c.name']: np.array(record['c.embedding']) for record in result}
    
# 定义一个函数进行全文检索
def retrieve_products_fulltext(query):
    with driver.session() as session:
        # 使用全文索引在 Clothing 节点中查找匹配的节点
        result = session.run("""
        CALL db.index.fulltext.queryNodes('clothingIndex', $query)
        YIELD node, score
        RETURN node.id as id, node.name AS name, node.brand AS brand, node.details AS details, score
        ORDER BY score DESC LIMIT 10
        """, {"query": query})
        
        # 打印每一个匹配的结果
        products = []
        for record in result:
            
            print(f"ID: {record['id']}, Name: {record['name']}, Brand: {record['brand']}, Details: {record['details']}, Score: {record['score']}")
            
            products.append({
                "id": record["id"],  # 包含 ID
                "name": record["name"],
                "brand": record["brand"],
                "details": record["details"],
                "score": record["score"]
            })

    return products

In [30]:
# 定义提示模板
template = """
User is searching for clothing. The input query is: "{input_query}".
The following product data matches the query: 
{product_data}.
Generate a response to suggest clothing.
"""
prompt = PromptTemplate(input_variables=["input_query", "product_data"], template=template)

# 自定义生成响应函数
def generate_response(openai_model, input_query, product_data):
    combined_prompt = """
User is searching for clothing. The input query is: "{}".
The following product data matches the query:
{}.
Generate a response to suggest clothing.
""".format(input_query, product_data)

    # messages = [
    #     {"role": "system", "content": "You are a helpful fashion assistant"},
    #     {"role": "user", "content": combined_prompt}
    # ]

    # Ollama API expects a single string for the prompt
    # OllamaPrompt = f"You are a helpful fashion assistant.\n{combined_prompt}"

    response = openai_model.generate(combined_prompt)  # 修改为传递messages
    # 输出响应内容以检查是否为有效的 JSON
    print("Raw API Response:", response)

    # response = openai_model.generate(messages)  # 修改为传递messages

    try:
        # 尝试将响应解析为 JSON
        response_json = response.json()
        return response_json["text"]
    except ValueError as e:
        # 如果解析失败，捕获异常并打印原始响应内容
        print(f"Error decoding JSON: {e}. Raw response: {response.text}")
        return response.text  # 如果不是 JSON，则返回原始文本
    
    return response

In [31]:
# 主题关键词
# TODO: Add more keywords to improve topic detection accuracy
on_topic_keywords = ['jeans', 'dress', 'shirt', 'pants', 'clothing', 'size', 'color', 'fashion', 'outfit', 'trousers', 'jacket']

# TODO: 
off_topic_keywords = ['weather', 'news', 'movie', 'sports', 'politics', 'celebrity', 'tv show', 'music', 'event']

def detect_topic(user_input):
    if any(word in user_input.lower() for word in on_topic_keywords):
        return "on_topic"
    elif any(word in user_input.lower() for word in off_topic_keywords):
        return "off_topic"
    else:
        return "neutral"

In [32]:
def search_and_generate_response(user_query):
    # 从 Neo4j 使用全文索引检索匹配的服装
    similar_products = retrieve_products_fulltext(user_query)  # 使用用户查询进行全文搜索
    print("=====================================================")
    print(f"Similar products: {similar_products}")

    # 格式化产品数据以供提示输入
    # product_data = ", ".join([f"Name: {p['name']} ({p['brand']}, {p['details']})" for p in similar_products])
    product_data = ", ".join([f"{p['name']} (ID: {p['id']}, Brand: {p['brand']}, Details: {p['details']})" for p in similar_products])
    
    # 打印格式化的产品数据以调试
    print("=====================================================")
    print(f"Formatted product data: {product_data}")

    # 使用自定义 OpenAI 类生成响应
    # response = generate_response(llm, user_query, product_data)

    response = generate_response(llm_json_mode, user_query, product_data)
    
    
    # 返回生成的响应和产品数据（JSON 格式）
    return {
        "response": response,
        "products": similar_products  # 这里返回完整的产品信息
    }

# 示例用法
user_input = "actually, I'm looking for a white shirt with brand huili"

if detect_topic(user_input) == "on_topic":
    result = search_and_generate_response(user_input)
    print("=====================================================")
    print(result)
elif detect_topic(user_input) == "off_topic":
    print({"response": "It seems your question is off-topic. Do you want to search for clothing?"})

ID: 55, Name: Casual Shirt, Brand: HuiLi, Details: Cotton shirt with button-down collar, Score: 3.6248295307159424
ID: 35, Name: Casual T-shirt, Brand: HuiLi, Details: Cotton t-shirt with round neck, Score: 3.4255385398864746
ID: 45, Name: Graphic T-shirt, Brand: HuiLi, Details: Cotton t-shirt with graphic print, Score: 3.4255385398864746
ID: 51, Name: Polka Dot Dress, Brand: HuiLi, Details: A-line dress with polka dots, Score: 1.547006368637085
ID: 6, Name: Z1975 FLARED HIGH-WAIST JEANS, Brand: HuiLi, Details: HIGH-RISE - FLAREFaded high-rise jeans with a five-pocket design. Flared hems. Front zip fly and metal top button fastening., Score: 1.0164390802383423
ID: 12, Name: HIGH-RISE SKINNY SCULPT TRF JEANS, Brand: HuiLi, Details: HIGH WAIST - SKINNY - ANKLE LENGTHHigh-waist super stretch jeans with a five-pocket design. Front zip fly and metal top button fastening., Score: 0.9937466382980347
ID: 11, Name: Z1975 FLARED HIGH-WAIST JEANS, Brand: HuiLi, Details: HIGH-WAIST - FLAREFaded hi

ValueError: Received unsupported message type for Ollama.