# Neo4j Generative AI Workshop

## Setup

In [1]:
%%capture
%pip install sentence_transformers langchain openai tiktoken python-dotenv gradio graphdatascience

In [2]:
from graphdatascience import GraphDataScience
from dotenv import load_dotenv
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from langchain.embeddings import OpenAIEmbeddings, BedrockEmbeddings, SentenceTransformerEmbeddings

### Setup Environment Variables

In [3]:
load_dotenv('work.env', override=True)

# Neo4j
NEO4J_URI = os.getenv('NEO4J_URI', 'bolt://localhost:7687')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME', 'neo4j')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD', 'password')
AURA_DS = eval(os.getenv('AURA_DS', 'false').title())

# LLM
EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'openai')
LLM = os.getenv('LLM', 'gpt-3.5')

## Knowledge Graph Building

### Get Source Data

In [None]:
department_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/department.csv')
department_df

Unnamed: 0,departmentNo,departmentName,sectionNo,sectionName
0,1676,Jersey Basic,16,Womens Everyday Basics
1,1339,Clean Lingerie,61,Womens Lingerie
2,3608,Tights basic,62,"Womens Nightwear, Socks & Tigh"
3,5883,Jersey Basic,26,Men Underwear
4,2032,Jersey,8,Mama
...,...,...,...,...
271,7857,Kids Boy Exclusive,46,Kids Boy
272,7510,Woven,28,Men Edition
273,3420,Small Accessories Extended,66,Womens Small accessories
274,8090,Promotion/Other/Offer,29,Men Other


In [None]:
product_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/product.csv')
product_df

Unnamed: 0,productCode,prodName,productTypeNo,productTypeName,productGroupName,garmentGroupNo,garmentGroupName,detailDesc
0,108775,Strap top,253,Vest top,Garment Upper body,1002,Jersey Basic,Jersey top with narrow shoulder straps.
1,110065,OP T-shirt (Idro),306,Bra,Underwear,1017,"Under-, Nightwear","Microfibre T-shirt bra with underwired, moulde..."
2,111565,20 den 1p Stockings,304,Underwear Tights,Socks & Tights,1021,Socks and Tights,"Semi shiny nylon stockings with a wide, reinfo..."
3,111586,Shape Up 30 den 1p Tights,273,Leggings/Tights,Garment Lower body,1021,Socks and Tights,Tights with built-in support to lift the botto...
4,111593,Support 40 den 1p Tights,304,Underwear Tights,Socks & Tights,1021,Socks and Tights,"Semi shiny tights that shape the tummy, thighs..."
...,...,...,...,...,...,...,...,...
12053,939927,Dolphin,265,Dress,Garment Full body,1013,Dresses Ladies,Short dress in an airy weave with a small stan...
12054,942187,ED Sasha tee,255,T-shirt,Garment Upper body,1005,Jersey Fancy,"Oversized, straight-cut T-shirt in a soft moda..."
12055,946282,Linnea dress,265,Dress,Garment Full body,1013,Dresses Ladies,Short dress in lace with flounces down the fro...
12056,947599,ED Duno 2p.,254,Top,Garment Upper body,1005,Jersey Fancy,"Long-sleeved tops in soft, organic cotton jers..."


In [None]:
article_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/article.csv')
article_df

Unnamed: 0,articleId,productCode,departmentNo,prodName,productTypeName,graphicalAppearanceNo,graphicalAppearanceName,colourGroupCode,colourGroupName
0,108775015,108775,1676,Strap top,Vest top,1010016,Solid,9,Black
1,108775044,108775,1676,Strap top,Vest top,1010016,Solid,10,White
2,110065001,110065,1339,OP T-shirt (Idro),Bra,1010016,Solid,9,Black
3,110065002,110065,1339,OP T-shirt (Idro),Bra,1010016,Solid,10,White
4,111565001,111565,3608,20 den 1p Stockings,Underwear Tights,1010016,Solid,9,Black
...,...,...,...,...,...,...,...,...,...
21591,939927001,939927,1322,Dolphin,Dress,1010013,Other pattern,9,Black
21592,942187001,942187,1919,ED Sasha tee,T-shirt,1010016,Solid,9,Black
21593,946282001,946282,1322,Linnea dress,Dress,1010021,Lace,9,Black
21594,947599001,947599,1919,ED Duno 2p.,Top,1010016,Solid,9,Black


In [None]:
customer_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/customer.csv')
customer_df

Unnamed: 0,customerId,fn,active,clubMemberStatus,fashionNewsFrequency,age,postalCode
0,0003e867a930d0d6842f923d6ba7c9b77aba33fe2a0fbf...,1.0,1.0,ACTIVE,Regularly,33.0,d647e4ede3d0eb4ce0750440a110350b5f4c758165d89d...
1,00140d87c629b961e410e1d143084146c6fe71df40fe3d...,,,ACTIVE,NONE,24.0,d686e242886674f5bed783e6ceb2c52fe89f2c39996bbf...
2,00264b7d4cd6498292e8a355b699c2d07725d123f04867...,1.0,1.0,ACTIVE,Regularly,53.0,2c29ae653a9282cce4151bd87643c907644e09541abc28...
3,005c6d3bb66c86aab606814cd9995a12f99b3a44b58c72...,,,PRE-CREATE,NONE,,177b4a2258a85a2247daaa7cdffba96a74c741ea8a6605...
4,006684ff58368b611db31b1ca782a87cad496e69835e42...,,,ACTIVE,NONE,32.0,4296834187b1ffb908c0aa276b29a4b1af87cad557fb40...
...,...,...,...,...,...,...,...
1995,feac9822f51efc778acc044776b4b34e8e0a86615bf983...,,,ACTIVE,NONE,48.0,8cecc780f67ff32def9c8e8dff5f454bce26a7cbd4c860...
1996,fef793ec3a7d62d782824517355d74ded50964dce33009...,,,ACTIVE,NONE,46.0,5799a39cffe701ebdb12181348bf10f9e23abcc3868c43...
1997,ff2b58ad3e83f2e3499b3eda6ea99993b3bca10d8ceee4...,,,ACTIVE,NONE,35.0,2c29ae653a9282cce4151bd87643c907644e09541abc28...
1998,ffb925b11e1bb2e375d22a02d67907994eb8cb92ec2e7d...,,,ACTIVE,NONE,34.0,ebdd8c5c893683c3cf52c011d4e35024e46d183c95f0fa...


In [None]:
transaction_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/transaction.csv')
transaction_df

Unnamed: 0,tDat,customerId,articleId,price,salesChannelId,txId
0,2018-09-20,080756754aef493b2b36f592eae744f2b9787dc55b635b...,662888002,0.033881,2,1559
1,2018-09-20,080756754aef493b2b36f592eae744f2b9787dc55b635b...,662888001,0.033881,2,1560
2,2018-09-20,080756754aef493b2b36f592eae744f2b9787dc55b635b...,651244002,0.013542,2,1561
3,2018-09-20,080756754aef493b2b36f592eae744f2b9787dc55b635b...,651244001,0.006763,2,1562
4,2018-09-20,0843d9fb6e4f3befa53ff3a8447b902b9f75bfa955a0f9...,633152003,0.030492,1,1588
...,...,...,...,...,...,...
48059,2020-09-22,b6be55f233772b5fc4a1ebedf36542fb3e1b6c15c23c7e...,921266007,0.016932,2,31779124
48060,2020-09-22,b6be55f233772b5fc4a1ebedf36542fb3e1b6c15c23c7e...,812530004,0.010153,2,31779125
48061,2020-09-22,b6be55f233772b5fc4a1ebedf36542fb3e1b6c15c23c7e...,942187001,0.016932,2,31779126
48062,2020-09-22,b6be55f233772b5fc4a1ebedf36542fb3e1b6c15c23c7e...,866731001,0.025407,2,31779127


### Connect to Neo4j

In [None]:
load_dotenv('.env', override=True)

# Use Neo4j URI and credentials according to our setup
gds = GraphDataScience(
    NEO4J_URI,
    auth=(NEO4J_USERNAME, NEO4J_PASSWORD),
    aura_ds=AURA_DS)

# Necessary if you enabled Arrow on the db - this is true for AuraDS
gds.set_database("neo4j")

### Create Constraints

In [None]:
# one uniqueness constraint for each node label
gds.run_cypher('CREATE CONSTRAINT unique_department_no IF NOT EXISTS FOR (n:Department) REQUIRE n.departmentNo IS UNIQUE')
gds.run_cypher('CREATE CONSTRAINT unique_product_code IF NOT EXISTS FOR (n:Product) REQUIRE n.productCode IS UNIQUE')
gds.run_cypher('CREATE CONSTRAINT unique_article_id IF NOT EXISTS FOR (n:Article) REQUIRE n.articleId IS UNIQUE')
gds.run_cypher('CREATE CONSTRAINT unique_customer_id IF NOT EXISTS FOR (n:Customer) REQUIRE n.customerId IS UNIQUE')

### Helper Functions

In [None]:
from typing import Tuple, Union
from numpy.typing import ArrayLike


def make_map(x):
    if type(x) == str:
        return x, x
    elif type(x) == tuple:
        return x
    else:
        raise Exception("Entry must of type string or tuple")


def make_set_clause(prop_names: ArrayLike, element_name='n', item_name='rec'):
    clause_list = []
    for prop_name in prop_names:
        clause_list.append(f'{element_name}.{prop_name} = {item_name}.{prop_name}')
    return 'SET ' + ', '.join(clause_list)


def make_node_merge_query(node_key_name: str, node_label: str, cols: ArrayLike):
    template = f'''UNWIND $recs AS rec\nMERGE(n:{node_label} {{{node_key_name}: rec.{node_key_name}}})'''
    prop_names = [x for x in cols if x != node_key_name]
    if len(prop_names) > 0:
        template = template + '\n' + make_set_clause(prop_names)
    return template + '\nRETURN count(n) AS nodeLoadedCount'


def make_rel_merge_query(source_target_labels: Union[Tuple[str, str], str],
                         source_node_key: Union[Tuple[str, str], str],
                         target_node_key: Union[Tuple[str, str], str],
                         rel_type: str,
                         cols: ArrayLike,
                         rel_key: str = None):
    source_target_label_map = make_map(source_target_labels)
    source_node_key_map = make_map(source_node_key)
    target_node_key_map = make_map(target_node_key)

    merge_statement = f'MERGE(s)-[r:{rel_type}]->(t)'
    if rel_key is not None:
        merge_statement = f'MERGE(s)-[r:{rel_type} {{{rel_key}: rec.{rel_key}}}]->(t)'

    template = f'''\tUNWIND $recs AS rec
    MATCH(s:{source_target_label_map[0]} {{{source_node_key_map[0]}: rec.{source_node_key_map[1]}}})
    MATCH(t:{source_target_label_map[1]} {{{target_node_key_map[0]}: rec.{target_node_key_map[1]}}})\n\t''' + merge_statement
    prop_names = [x for x in cols if x not in [rel_key, source_node_key_map[1], target_node_key_map[1]]]
    if len(prop_names) > 0:
        template = template + '\n\t' + make_set_clause(prop_names, 'r')
    return template + '\n\tRETURN count(r) AS relLoadedCount'


def chunks(xs, n=10_000):
    n = max(1, n)
    return [xs[i:i + n] for i in range(0, len(xs), n)]


def load_nodes(gds: GraphDataScience, node_df: pd.DataFrame, node_key_col: str, node_label: str, chunk_size=10_000):
    records = node_df.to_dict('records')
    print(f'======  loading {node_label} nodes  ======')
    total = len(records)
    print(f'staging {total:,} records')
    query = make_node_merge_query(node_key_col, node_label, node_df.columns.copy())
    cumulative_count = 0
    for recs in chunks(records, chunk_size):
        res = gds.run_cypher(query, params={'recs': recs})
        cumulative_count += res.iloc[0, 0]
        print(f'Loaded {cumulative_count:,} of {total:,} nodes')


def load_rels(gds: GraphDataScience,
              rel_df: pd.DataFrame,
              source_target_labels: Union[Tuple[str, str], str],
              source_node_key: Union[Tuple[str, str], str],
              target_node_key: Union[Tuple[str, str], str],
              rel_type: str,
              rel_key: str = None,
              chunk_size=10_000):
    records = rel_df.to_dict('records')
    print(f'======  loading {rel_type} relationships  ======')
    total = len(records)
    print(f'staging {total:,} records')
    query = make_rel_merge_query(source_target_labels, source_node_key,
                                 target_node_key, rel_type, rel_df.columns.copy(), rel_key)
    cumulative_count = 0
    for recs in chunks(records, chunk_size):
        res = gds.run_cypher(query, params={'recs': recs})
        cumulative_count += res.iloc[0, 0]
        print(f'Loaded {cumulative_count:,} of {total:,} relationships')

### Load Nodes

In [None]:
%%time
load_nodes(gds, department_df, 'departmentNo', 'Department')

staging 276 records
Loaded 276 of 276 nodes
CPU times: user 6.86 ms, sys: 2.39 ms, total: 9.25 ms
Wall time: 2.44 s


In [None]:
%%time
load_nodes(gds, product_df, 'productCode', 'Product')

staging 12,058 records
Loaded 10,000 of 12,058 nodes
Loaded 12,058 of 12,058 nodes
CPU times: user 261 ms, sys: 32.3 ms, total: 293 ms
Wall time: 19.9 s


In [None]:
%%time
load_nodes(gds, article_df.drop(columns=['productCode', 'departmentNo']), 'articleId', 'Article')

staging 21,596 records
Loaded 10,000 of 21,596 nodes
Loaded 20,000 of 21,596 nodes
Loaded 21,596 of 21,596 nodes
CPU times: user 388 ms, sys: 18.1 ms, total: 406 ms
Wall time: 18 s


In [None]:
%%time
load_nodes(gds, customer_df, 'customerId', 'Customer')

staging 2,000 records
Loaded 2,000 of 2,000 nodes
CPU times: user 37.3 ms, sys: 3.29 ms, total: 40.6 ms
Wall time: 3.28 s


### Load Relationships

In [None]:
%%time
load_rels(gds, article_df[['articleId', 'departmentNo']], source_target_labels=('Article', 'Department'),
          source_node_key='articleId', target_node_key='departmentNo',
          rel_type='FROM_DEPARTMENT')

staging 21,596 records
Loaded 10,000 of 21,596 relationships
Loaded 20,000 of 21,596 relationships
Loaded 21,596 of 21,596 relationships
CPU times: user 136 ms, sys: 10 ms, total: 146 ms
Wall time: 14.3 s


In [None]:
%%time
load_rels(gds, article_df[['articleId', 'productCode']], source_target_labels=('Article', 'Product'),
          source_node_key='articleId',target_node_key='productCode',
          rel_type='VARIANT_OF')

staging 21,596 records
Loaded 10,000 of 21,596 relationships
Loaded 20,000 of 21,596 relationships
Loaded 21,596 of 21,596 relationships
CPU times: user 130 ms, sys: 14.3 ms, total: 145 ms
Wall time: 10.8 s


In [None]:
%%time
load_rels(gds, transaction_df, source_target_labels=('Customer', 'Article'),
          source_node_key='customerId', target_node_key='articleId',
          rel_type='PURCHASED')

staging 48,064 records
Loaded 10,000 of 48,064 relationships
Loaded 20,000 of 48,064 relationships
Loaded 30,000 of 48,064 relationships
Loaded 40,000 of 48,064 relationships
Loaded 48,064 of 48,064 relationships
CPU times: user 748 ms, sys: 37.6 ms, total: 785 ms
Wall time: 30.5 s


#### Convert Transaction Dates

In [None]:
gds.run_cypher('''
MATCH (:Customer)-[r:PURCHASED]->()
SET r.tDat = date(r.tDat)
''')

## Simple Vector Search
In this Section We will build Text Embeddings of Product and demonstrate how to leverage the Neo4j vector index for vector search.

### Creating Text Embedding

In [11]:
def load_embedding_model(embedding_model_name: str):
    if embedding_model_name == "openai":
        embeddings = OpenAIEmbeddings()
        dimension = 1536
    elif embedding_model_name == "aws":
        embeddings = BedrockEmbeddings()
        dimension = 1536
    else:
        embeddings = SentenceTransformerEmbeddings(
            model_name="all-MiniLM-L6-v2", cache_folder="/embedding_model"
        )
        dimension = 384
    return embeddings, dimension

In [12]:
embedding_model, dimension = load_embedding_model(EMBEDDING_MODEL)

In [None]:
product_emb_df = product_df[['productCode', 'prodName', 'productTypeName', 'garmentGroupName', 'detailDesc']]
product_emb_df = product_emb_df[product_emb_df.detailDesc.notnull()]

In [None]:
def create_doc(row):
    return f'''
Name: {row.prodName}
Type: {row.productTypeName}
Group: {row.garmentGroupName}
Description: {row.detailDesc}
'''

product_emb_df['doc'] = product_emb_df.apply(create_doc, axis=1)
product_emb_df = product_emb_df.drop(columns=['prodName', 'productTypeName', 'garmentGroupName', 'detailDesc'])
product_emb_df

Unnamed: 0,productCode,doc
0,108775,\nName: Strap top\nType: Vest top\nGroup: Jers...
1,110065,\nName: OP T-shirt (Idro)\nType: Bra\nGroup: U...
2,111565,\nName: 20 den 1p Stockings\nType: Underwear T...
3,111586,\nName: Shape Up 30 den 1p Tights\nType: Leggi...
4,111593,\nName: Support 40 den 1p Tights\nType: Underw...
...,...,...
12053,939927,\nName: Dolphin\nType: Dress\nGroup: Dresses L...
12054,942187,\nName: ED Sasha tee\nType: T-shirt\nGroup: Je...
12055,946282,\nName: Linnea dress\nType: Dress\nGroup: Dres...
12056,947599,\nName: ED Duno 2p.\nType: Top\nGroup: Jersey ...


In [None]:
%%time

count = 0
embeddings = []
for docs in chunks(product_emb_df.doc, n=500):
    count += len(docs)
    print(f'Embedded {count} of {product_emb_df.shape[0]}')
    embeddings.extend(embedding_model.embed_documents(docs))

Embedded 500 of 12021
Embedded 1000 of 12021
Embedded 1500 of 12021
Embedded 2000 of 12021
Embedded 2500 of 12021
Embedded 3000 of 12021
Embedded 3500 of 12021
Embedded 4000 of 12021
Embedded 4500 of 12021
Embedded 5000 of 12021


Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..


Embedded 5500 of 12021
Embedded 6000 of 12021
Embedded 6500 of 12021
Embedded 7000 of 12021
Embedded 7500 of 12021
Embedded 8000 of 12021
Embedded 8500 of 12021
Embedded 9000 of 12021
Embedded 9500 of 12021
Embedded 10000 of 12021
Embedded 10500 of 12021
Embedded 11000 of 12021
Embedded 11500 of 12021
Embedded 12000 of 12021
Embedded 12021 of 12021
CPU times: user 3.28 s, sys: 378 ms, total: 3.66 s
Wall time: 46.2 s


In [None]:
product_emb_df['textEmbedding'] = embeddings

In [None]:
print(make_node_merge_query('productCode', 'Product', product_emb_df.columns.copy()))

UNWIND $recs AS rec
MERGE(n:Product {productCode: rec.productCode})
SET n.doc = rec.doc, n.textEmbedding = rec.textEmbedding
RETURN count(n) AS nodeLoadedCount


#### Create Vector Property

In [None]:
records = product_emb_df[['productCode', 'textEmbedding']].to_dict('records')
print(f'======  loading Product text embeddings ======')
total = len(records)
print(f'staging {total:,} records')
cumulative_count = 0
for recs in chunks(records, n=100):
    res = gds.run_cypher('''
    UNWIND $recs AS rec
    MATCH(n:Product {productCode: rec.productCode})
    CALL db.create.setNodeVectorProperty(n, "textEmbedding", rec.textEmbedding)
    RETURN count(n) AS propertySetCount
    ''', params={'recs': recs})
    cumulative_count += res.iloc[0, 0]
    print(f'Set {cumulative_count:,} of {total:,} text embeddings')

staging 12,021 records
Set 100 of 12,021 text embeddings
Set 200 of 12,021 text embeddings
Set 300 of 12,021 text embeddings
Set 400 of 12,021 text embeddings
Set 500 of 12,021 text embeddings
Set 600 of 12,021 text embeddings
Set 700 of 12,021 text embeddings
Set 800 of 12,021 text embeddings
Set 900 of 12,021 text embeddings
Set 1,000 of 12,021 text embeddings
Set 1,100 of 12,021 text embeddings
Set 1,200 of 12,021 text embeddings
Set 1,300 of 12,021 text embeddings
Set 1,400 of 12,021 text embeddings
Set 1,500 of 12,021 text embeddings
Set 1,600 of 12,021 text embeddings
Set 1,700 of 12,021 text embeddings
Set 1,800 of 12,021 text embeddings
Set 1,900 of 12,021 text embeddings
Set 2,000 of 12,021 text embeddings
Set 2,100 of 12,021 text embeddings
Set 2,200 of 12,021 text embeddings
Set 2,300 of 12,021 text embeddings
Set 2,400 of 12,021 text embeddings
Set 2,500 of 12,021 text embeddings
Set 2,600 of 12,021 text embeddings
Set 2,700 of 12,021 text embeddings
Set 2,800 of 12,021 tex

#### Vector Index

In [None]:
%%time

gds.run_cypher(f'CALL db.index.vector.createNodeIndex("product-text-embeddings", "Product", "textEmbedding", {dimension}, "cosine")')

# wait for full index creation (timeout after 300 seconds)
gds.run_cypher('CALL db.awaitIndex("product-text-embeddings", 300)')

CPU times: user 8.44 ms, sys: 2.7 ms, total: 11.1 ms
Wall time: 1min 27s


#### Vector Search
An Example with Product Search

In [None]:
#search_prompt = 'denim jeans, loose fit, high-waist'
search_prompt = 'winter sweater with zipper'

In [None]:
query_vector = embedding_model.embed_query(search_prompt)
print(f'query vector length: {len(query_vector)}')
print(f'query vector sample: {query_vector[:10]}')

query vector length: 1536
query vector sample: [-0.015721399620501487, -0.01600166252793351, -0.0007557081544117396, -0.020672705397198326, -0.025810854974828365, 0.013906364579883364, -0.009435508570318827, -0.010049416864799713, 0.0030361784703818165, -0.019391505397297122]


In [None]:
gds.run_cypher('''
CALL db.index.vector.queryNodes("product-text-embeddings", 10, $queryVector)
YIELD node AS product, score
RETURN product.prodName AS prodName,
    product.productTypeName AS productTypeName,
    product.garmentGroupName AS garmentGroupName,
    product.detailDesc AS detailDesc,
    score
''', params={'queryVector': query_vector})

Unnamed: 0,prodName,productTypeName,garmentGroupName,detailDesc,score
0,Baby body,Bodysuit,Jersey Basic,"Body in soft jersey with a wider neckline, 3/4...",0.914686
1,Brit Baby Tee,T-shirt,Jersey Basic,"Short, fitted top in ribbed cotton jersey with...",0.903118
2,SLIM FIT: BASIC,Trousers,Woven/Jersey/Knitted mix Baby,"5-pocket jeans in soft, washed stretch denim w...",0.899984
3,Vickan baby tee,T-shirt,Jersey Basic,"Short, fitted T-shirt in cotton jersey with a ...",0.898215
4,CLOUDY bathrobe,Top,Woven/Jersey/Knitted mix Baby,Baby Exclusive. Dressing gown in soft organic ...,0.896351
5,Tucky towel,Bodysuit,Accessories,Soft cotton towel with a hood. Patterned velou...,0.894173
6,TYGA trouser,Trousers,Woven/Jersey/Knitted mix Baby,Baby Exclusive. Pull-on trousers in soft organ...,0.893967
7,Klimt basic skinny,Trousers,Woven/Jersey/Knitted mix Baby,5-pocket jeans in washed stretch denim with an...,0.893927
8,Bambini,Blouse,Blouses,V-neck blouse in a viscose crêpe weave with da...,0.893908
9,CHILLY LS T-shirt,T-shirt,Woven/Jersey/Knitted mix Baby,Baby Exclusive. Long-sleeved Henley top in sof...,0.893597


We can also do this with langchain which is a recommended approach going forward.  To do this we use the Neo4jVector class and call the method to sert it up from an existing index in the graph.

In [4]:
##TODO: THis is the right Transition point for langchain...
##Do that here and make the rest of the below use it instead of gds.run_cypher

## Semantic Search with Context
Using Explicit Relationships in EN terprise data


Above we see how you can use the vector index to find semantic similar products in user searches.  but there is a rich graph full of other information in it. Lets leverage our knowledge graph to make this better

An important piece of information expressed in this graph, but not directly in the documents, is customer purchasing behavior.  We can use A Cypher Query to make recommendations without any document behavior. this is similar to collaborative filtering but generalized to purchase history (not necessarily rating based)

#### Example Purchase History

Consider the below customer

In [None]:
CUSTOMER_ID = "daae10780ecd14990ea190a1e9917da33fe96cd8cfa5e80b67b4600171aa77e0"
gds.run_cypher('''
    MATCH(c:Customer {customerId: $customerId})-[:PURCHASED]->(:Article)
    -[:VARIANT_OF]->(p:Product)
    RETURN p.productCode AS productCode,
        p.prodName AS prodName,
        p.productTypeName AS productTypeName,
        p.garmentGroupName AS garmentGroupName,
        p.detailDesc AS detailDesc,
        count(*) AS purchaseCount
    ORDER BY purchaseCount DESC
''', params={'customerId': CUSTOMER_ID})

Unnamed: 0,productCode,prodName,productTypeName,garmentGroupName,detailDesc,purchaseCount
0,569974,DONT USE ROLAND HOOD,Hoodie,Jersey Basic,Top in sweatshirt fabric with a lined drawstri...,2
1,557247,Petar Sweater,Sweater,Jersey Basic,Oversized top in sturdy sweatshirt fabric with...,2
2,753724,Rosemary,Dress,Dresses Ladies,Short dress in woven fabric with 3/4-length sl...,1
3,733027,Tove,Top,Jersey Fancy,Short top in soft cotton jersey with a round n...,1
4,713577,Malte r-neck,Sweater,Knitwear,"Jumper in soft, patterned, fine-knit cotton wi...",1
5,687016,DORIS CREW,Sweater,Jersey Fancy,Top in sweatshirt fabric with a motif on the f...,1
6,731142,Lead Superskinny,Trousers,Trousers,Chinos in stretch twill with a zip fly and but...,1
7,688537,Simple as that Cheeky Tanga,Swimwear bottom,Swimwear,Fully lined bikini bottoms with a mid waist an...,1
8,606711,Rylee flatform,Heeled sandals,Shoes,"Sandals with imitation suede straps, an elasti...",1
9,642498,Bubble Bum Bandeau,Bikini top,Swimwear,Fully lined bandeau bikini top with padded cup...,1


#### Graph Patterns For Retrieval Query

In [None]:
# This is the example Pattern we can use to predict likely customer preferencenes based on collaborative behavior
#TODO: Should we be filtering this somehow for best practice
gds.run_cypher('''
    MATCH(c:Customer {customerId: $customerId})-[:PURCHASED]->(:Article)
    <-[:PURCHASED]-(:Customer)-[:PURCHASED]->(:Article)
    -[:VARIANT_OF]->(p:Product)
    RETURN p.productCode AS productCode,
        p.prodName AS prodName,
        p.productTypeName AS productTypeName,
        p.garmentGroupName AS garmentGroupName,
        p.detailDesc AS detailDesc,
        count(*) AS score
    ORDER BY score DESC LIMIT 10
''', params={'customerId': CUSTOMER_ID})

Unnamed: 0,productCode,prodName,productTypeName,garmentGroupName,detailDesc,score
0,685816,RONNY REG RN T-SHIRT,T-shirt,Jersey Basic,Round-necked T-shirt in soft cotton jersey.,28
1,599580,Timeless Midrise Brief,Swimwear bottom,Swimwear,Fully lined bikini bottoms with a mid waist an...,20
2,684209,Simple as That Triangle Top,Bikini top,Swimwear,"Lined, non-wired, triangle bikini top with a w...",17
3,685813,PETAR SWEATSHIRT,Sweater,Jersey Basic,Top in soft sweatshirt fabric. Slightly looser...,14
4,688537,Simple as that Cheeky Tanga,Swimwear bottom,Swimwear,Fully lined bikini bottoms with a mid waist an...,13
5,778064,Claudine t-shirt,T-shirt,Jersey Basic,Fitted top in soft organic cotton jersey with ...,12
6,685814,RICHIE HOOD,Hoodie,Jersey Basic,Hoodie in sweatshirt fabric made from a cotton...,10
7,557247,Petar Sweater,Sweater,Jersey Basic,Oversized top in sturdy sweatshirt fabric with...,9
8,448509,Perrie Slim Mom Denim TRS,Trousers,Trousers,"5-pocket, ankle-length jeans in washed, sturdy...",9
9,806388,Therese tee,T-shirt,Jersey Basic,Wide T-shirt in soft cotton jersey with a ribb...,8


In [None]:
# Query for customers who are most interested in products that match query
gds.run_cypher('''
CALL db.index.vector.queryNodes("product-text-embeddings", 100, $queryVector)
YIELD node AS product, score AS searchScore
    OPTIONAL MATCH(product)<-[:VARIANT_OF]-(:Article)<-[:PURCHASED]-(:Customer)
    -[:PURCHASED]->(a:Article)<-[:PURCHASED]-(:Customer {customerId: $customerId})
RETURN product.prodName AS prodName,
    product.detailDesc AS detailDesc,
    searchScore,
    count(a) AS purchaseHistoryScore
ORDER BY purchaseHistoryScore DESC LIMIT 10
''', params={'queryVector': embedding_model.embed_query("Oversized Sweater"), 'customerId': CUSTOMER_ID})

Unnamed: 0,prodName,detailDesc,searchScore,purchaseHistoryScore
0,Petar Sweater,Oversized top in sturdy sweatshirt fabric with...,0.93894,9
1,Queen Sweater,Top in lightweight sweatshirt fabric with ribb...,0.927906,7
2,Jess oversize LS,Oversized top in soft jersey made from a cotto...,0.930035,3
3,SISTER OL,Off-the-shoulder top in sweatshirt fabric with...,0.927986,1
4,Salt Sweater,Long top in soft sweatshirt fabric with a ribb...,0.929883,1
5,Papaya Hood,Oversized jacket in soft sweatshirt fabric wit...,0.928753,1
6,Ridge,Oversized jumper in a soft knit with a deep V-...,0.930296,1
7,Dolly hood,Oversized top in soft sweatshirt fabric with a...,0.934544,1
8,Allen Sweater,Top in sweatshirt fabric made from a cotton bl...,0.931057,1
9,Annie Oversized Hood,Oversized top in sweatshirt fabric with a line...,0.933388,1


## KG Powered Inference for AI

We saw before how could use graph pattern matching to personalize search and make it more relevant.

TODO: We also saw how we could use similar tools to power semantic search and analytics on entities connected to documents

Graph pattern matching is very power and can work well in a lot of scenarios.

In addition to this, we also have Graph Data Science, which can allow as to enrich the current Knowledge graph with machine learning, that can
1. Provide addition information to improve relvancy of search results at scale
2. Provide additional inferences to GenAI

We will show an example of how this works using Node Embedding and K-Nearest Neighbor algorithms

TODO: Motivation around GML approach here....how it can scale, relationship ti KGC/link porediction.  This is a simple unsupervised approach.  You can expand on this and refine it more.

### Embedding and KNN

In [None]:
pd.set_option('display.max_rows', 12)
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.width', 0)

In [None]:
def clear_all_graphs():
    g_names = gds.graph.list().graphName.tolist()
    for g_name in g_names:
        g = gds.graph.get(g_name)
        g.drop()

#### Clear Past Analysis (If rerunning this Notebook)

In [None]:
clear_all_graphs()

In [None]:
gds.run_cypher('''
    MATCH(:Article)-[r:CUSTOMERS_ALSO_PURCHASED]->()
    CALL {
        WITH r
        DELETE r
    } IN TRANSACTIONS OF 1000 ROWS
    ''')

#### Apply GDS FastRP Node Embeddings and K-Nearest Neighbor (KNN) Similarity

First, apply a graph projection to structure the portion of the graph we need in an optimized in-memory format for graph ML.

In [None]:
%%time
# graph projection
g, _ = gds.graph.project('proj',['Customer', 'Article', 'Product'],
                         {'PURCHASED':{'orientation':'UNDIRECTED'}, 'VARIANT_OF':{'orientation':'UNDIRECTED'}},
                         readConcurrency=4)

CPU times: user 2.2 ms, sys: 1.31 ms, total: 3.51 ms
Wall time: 431 ms


Next, we will generate node embeddings for similarity calculation.  In this case, we will use FastRP (Fast Random Projection) which is a fast, scalable, and robust embedding algorithm. FastRP calculates embeddings using probabilistic sampling and linear algebra.

In [None]:
%%time
# embeddings (writing back Article embeddings in case we want to introspect later)
gds.fastRP.mutate(g, mutateProperty='embedding', embeddingDimension=128, randomSeed=7474, concurrency=4)
gds.graph.writeNodeProperties(g, ['embedding'], ['Article'])

CPU times: user 4.06 ms, sys: 1.65 ms, total: 5.71 ms
Wall time: 4.27 s


writeMillis                 1358
graphName                   proj
nodeProperties       [embedding]
propertiesWritten          21596
Name: 0, dtype: object

This is what the node embeddings look like:

In [None]:
gds.run_cypher('MATCH(n:Article) RETURN n.articleId, n.embedding LIMIT 3')

Unnamed: 0,n.articleId,n.embedding
0,108775015,"[0.07331004738807678, 0.17926183342933655, -0.07301197201013565, -0.049210064113140106, 0.16503135859966278, -0.15523408353328705, -0.10822394490242004, -0.02722267620265484, 0.014662966132164001, 0.180541530251503, -0.08186649531126022, 0.23589280247688293, -0.1366392821073532, 0.11961854994297028, -0.02922603115439415, 0.04286094754934311, 0.04672681540250778, 0.086881123483181, -0.08363135904073715, -0.05791114270687103, -0.07598546147346497, 0.009629392065107822, -0.014021783135831356, 0..."
1,108775044,"[0.07349064946174622, 0.0967743992805481, -0.038152799010276794, -0.07673473656177521, 0.22684209048748016, -0.0060753063298761845, 0.16888433694839478, -0.11225447058677673, 0.09668536484241486, 0.03443354740738869, -0.0913434624671936, 0.14213915169239044, -0.1980677992105484, 0.15377040207386017, -0.1279836893081665, -0.03599829971790314, -0.0967002660036087, 0.14265014231204987, 0.03261440247297287, -0.11126242578029633, 0.06963685154914856, 0.2129615694284439, 0.006723809987306595, 0.12..."
2,110065001,"[-0.044018954038619995, 0.08427691459655762, -0.02065543830394745, 0.09351789951324463, -0.14247480034828186, -0.07779307663440704, -0.08055605739355087, 0.05282197892665863, 0.10809334367513657, -0.10579649358987808, -0.09285050630569458, 0.005664631258696318, 0.012895558029413223, -0.24933193624019623, -0.042424216866493225, 0.18484896421432495, -0.1699368953704834, -0.012699112296104431, 0.08738791197538376, -0.0476275309920311, 0.012392558157444, 0.024528082460165024, 0.080563485622406, ..."


Finally, we can do our similarity inference with K-Nearest Neighbor (KNN) and write back to the graph.
We will use a slightly low cutoff of 0.75 similarity score to extend the result size for exploration.  We can provide a higher cutoff at query time if needed.

In [None]:
%%time
# KNN
_ = gds.knn.write(g, nodeProperties=['embedding'], nodeLabels=['Article'],
              writeRelationshipType='CUSTOMERS_ALSO_PURCHASED', writeProperty='score',
             # sampleRate=1.0,maxIterations=1000,similarityCutoff=0.75,
              concurrency=4)
_

Knn:   0%|          | 0/100 [00:00<?, ?%/s]

CPU times: user 1.07 s, sys: 220 ms, total: 1.29 s
Wall time: 1min 30s


ranIterations                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          91
didConverge                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

In [None]:
# clear graph projection once done
g.drop()

graphName                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               proj
database                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

### Recommendations Based on Search Prompt

In [None]:
search_res_df = gds.run_cypher('''
CALL db.index.vector.queryNodes("product-text-embeddings", 10, $queryVector)
YIELD node AS product, score
RETURN product.prodName AS prodName,
    product.productTypeName AS productTypeName,
    product.garmentGroupName AS garmentGroupName,
    product.detailDesc AS detailDesc,
    score
    ORDER BY score DESC
''', params={'queryVector': query_vector})

In [None]:
recommendation_res_df =gds.run_cypher('''
CALL db.index.vector.queryNodes("product-text-embeddings", 5, $queryVector)
YIELD node AS search_res_product, score AS search_score
MATCH(search_res_product)<-[:VARIANT_OF]-(a0)-[s:CUSTOMERS_ALSO_PURCHASED]->(a)-[:VARIANT_OF]-(p)
WITH p.prodName AS prodName,
    p.productTypeName AS productTypeName,
    p.garmentGroupName AS garmentGroupName,
    p.detailDesc AS detailDesc,
    sum(s.score) AS aggScore,
    sum(search_score) AS search_score
RETURN prodName, productTypeName, garmentGroupName, search_score*aggScore AS score, detailDesc
    ORDER BY aggScore DESC LIMIT 10
''', params={'queryVector': query_vector})

In [None]:
print(f'User Search Prompt: "{search_prompt}"\n\n')
print('Search Results:\n')
display(search_res_df)
print('\nUser May Also Be Interested In:\n')
display(recommendation_res_df)

User Search Prompt: "winter sweater with zipper"


Search Results:



Unnamed: 0,prodName,productTypeName,garmentGroupName,detailDesc,score
0,Catfish Zip,Sweater,Knitwear,"Soft, textured-knit jumper with a ribbed stand-up collar, zip at the top, dropped shoulders and ribbing at the cuffs and hem.",0.933434
1,Yolo Zip L/S,Sweater,Knitwear,"Jumper in a soft, rib knit with a stand-up collar, a visible zip at the front and long sleeves.",0.931779
2,BANANA HALF ZIP SWEATER,Sweater,Knitwear,"Jumper in a soft rib knit with a high stand-up collar, zip at the top, long raglan sleeves with elbow patches, and ribbing at the cuffs and hem.",0.930935
3,Ben zip hoodie,Sweater,Knitwear,"Fine-knit jacket in a soft viscose blend with a drawstring hood, zip and front pockets. Gently dropped shoulders and ribbing at the cuffs and hem.",0.930924
4,Raven Half Zip Sweater,Sweater,Jersey Fancy,"Sports top in stretch, fast-drying functional fabric with a stand-up collar, zip at the front with a chin guard, and a yoke at the back. Low dropped shoulders and long sleeves with thumbholes at the cuffs. Rounded and slightly longer at the back.",0.93011
5,Southern Sweater,Sweater,Knitwear,"Jumper in a soft rib knit with pointelle details, low dropped shoulders and long sleeves. Double ribbed trim around the neckline, and ribbing at the cuffs and hem.",0.929089
6,Zorro half-zip sweater,Sweater,Jersey Basic,"Short top in sweatshirt fabric with a high, ribbed stand-up collar with a zip at the front. Dropped shoulders, long sleeves and ribbing at the cuffs and hem. Soft brushed inside.",0.928748
7,BAY BLOCK STRIPE ZIP-UP,Sweater,Knitwear,"Jumper in a soft, fine-knit modal and cotton blend with a ribbed stand-up collar, zip at the top and long sleeves.",0.928719
8,Yolo Zip LS,Sweater,Knitwear,"Fitted jumper in a soft, rib knit with a turtle neck, visible zip at the top and long sleeves.",0.92771
9,Håkan half zip knit,Sweater,Knitwear,"Jumper in a soft cotton knit with a high, ribbed stand-up collar and zip at the top. Long sleeves, and ribbing at the cuffs and hem.",0.927483



User May Also Be Interested In:



Unnamed: 0,prodName,productTypeName,garmentGroupName,score,detailDesc
0,Niffler Trousers,Trousers,Woven/Jersey/Knitted mix Baby,3.248279,"Pull-on trousers in washed, stretch twill with an elasticated, drawstring waist, front pockets, a fake back pocket and tapered legs."
1,Sunspot Seamless Crop Top,Vest top,Jersey Fancy,2.949513,"Short, fitted sports top with a racer back and elasticated hem. The sports top is designed with the minimum number of seams for a more comfortable fit and increased mobility."
2,STINA 3p boxer,Underwear bottom,"Under-, Nightwear",2.818649,Boxer briefs in cotton jersey with an elasticated waist and lined gusset.
3,CC COSMO dress BG,Dress,Jersey Basic,0.814822,"Dress in soft sweatshirt fabric with an embroidered motif at the top, long sleeves, twisted seams at the front and side pockets. Ribbing around the neckline, cuffs and hem and short slits at the hem. Slightly longer at the back. Soft brushed inside. The dress is made partly from recycled cotton."
4,Cappucino Brazilian High Waist,Underwear bottom,"Under-, Nightwear",0.814637,"Brazilian briefs in lace and mesh with a mid waist, lined gusset, wide sides and high cut at the back."
5,ED Duno 2p.,Top,Jersey Fancy,0.81436,"Long-sleeved tops in soft, organic cotton jersey with a slightly wider neckline."
6,Cannes Crew,Sweater,"Under-, Nightwear",0.813946,"Round-necked jumper in a soft, fine-knit viscose blend with long sleeves, short slits in the sides and ribbing around the neckline, cuffs and hem. Slightly longer at the back."
7,Berry utility denim Trs,Trousers,Trousers,0.813191,"Ankle-length jeans in sturdy cotton denim with a high waist, zip fly and button, front and back pockets, leg pockets with a flap and concealed press-stud and tapered legs with a tab and metal buckle at the ankles. The cotton content of the jeans is partly recycled."
8,LOGG Amaretto blazer,Blazer,Outdoor,0.812981,"Jacket woven in a linen blend with narrow notch lapels, a button at the front and welt front pockets with a flap. Lined."
9,JANINE SWEATER NEON,Sweater,Jersey Fancy,0.812765,"Top in soft sweatshirt fabric with a text print motif, dropped shoulders, long sleeves, ribbing around the neckline and cuffs and a drawstring at the hem. Soft brushed inside."


### Personalized Recommendation

In [None]:
CUSTOMER_ID = "daae10780ecd14990ea190a1e9917da33fe96cd8cfa5e80b67b4600171aa77e0"

#### Example Purchase History

In [None]:
gds.run_cypher('''
    MATCH(c:Customer {customerId:$customerId})-[r:PURCHASED]->(a)-[:VARIANT_OF]->(p:Product)
        RETURN a.articleId AS articleId,
        a.prodName AS prodName,
        r.tDat AS purchaseDate,
        a.productTypeName AS productTypeName,
        p.detailDesc AS detailDesc
        ORDER BY purchaseDate DESC
''', params = { 'customerId':CUSTOMER_ID, 'queryVector': query_vector})

Unnamed: 0,articleId,prodName,purchaseDate,productTypeName,detailDesc
0,753724004,Rosemary,2019-08-05,Dress,"Short dress in woven fabric with 3/4-length sleeves with an opening and ties at the cuffs, and a gently rounded hem. Unlined."
1,733027002,Tove,2019-08-05,Top,"Short top in soft cotton jersey with a round neckline, short sleeves and a seam at the hem with a decorative knot detail at the front."
2,713577001,Malte r-neck,2019-06-27,Sweater,"Jumper in soft, patterned, fine-knit cotton with ribbing around the neckline, cuffs and hem."
3,731142001,Lead Superskinny,2019-06-27,Trousers,"Chinos in stretch twill with a zip fly and button, side pockets, welt back pockets and skinny legs."
4,687016004,DORIS CREW,2019-06-22,Sweater,"Top in sweatshirt fabric with a motif on the front and ribbing around the neckline, cuffs and hem. Soft brushed inside."
...,...,...,...,...,...
20,620425001,Karin headband,2018-10-12,Hairband,Wide hairband in cotton jersey with a twisted detail.
21,662328001,Survivor,2018-10-12,Blouse,"Straight-cut blouse in a crêpe weave with a collar, concealed buttons down the front and fake flap front pockets. Yoke with a pleat at the back, long sleeves with pleats and buttoned cuffs, and a straight cut hem with slits in the sides."
22,682848003,Skinny RW Ankle Milo Zip,2018-10-12,Trousers,"5-pocket, ankle-length jeans in washed stretch denim with hard-worn details, a regular waist, zip fly and button, and skinny legs with a zip at the hems. The jeans are made partly from recycled cotton."
23,691072002,JEKYL SWEATSHIRT,2018-10-12,Sweater,"Top in sweatshirt fabric with long raglan sleeves and ribbing around the neckline, cuffs and hem. Soft brushed inside. Regular fit."


#### Personalized Product Recommendations

In [None]:
personalized_res_df = gds.run_cypher('''
    MATCH(c:Customer {customerId:$customerId})-[r:PURCHASED]->(a0)
    WITH a0
    MATCH(a0)-[s:CUSTOMERS_ALSO_PURCHASED]->(a)-[:VARIANT_OF]->(p:Product)
    WITH p, sum(s.score) AS aggRecScore
    WITH p, aggRecScore, gds.similarity.cosine($queryVector, p.textEmbedding) AS cosineSimilarity
    RETURN p.productCode AS productCode,
        aggRecScore,
        cosineSimilarity as searchScore,
        p.productTypeName AS productType,
        p.prodName AS name,
        p.detailDesc AS description
        ORDER BY searchScore DESC LIMIT 10
''', params = {'customerId':CUSTOMER_ID, 'queryVector': query_vector})

In [None]:
print(f'User Search Prompt: "{search_prompt}"\n\n')
print('Search Results:\n')
display(search_res_df)
print('\nUser May Also Be Interested In:\n')
display(personalized_res_df)

User Search Prompt: "winter sweater with zipper"


Search Results:



Unnamed: 0,prodName,productTypeName,garmentGroupName,detailDesc,score
0,Catfish Zip,Sweater,Knitwear,"Soft, textured-knit jumper with a ribbed stand-up collar, zip at the top, dropped shoulders and ribbing at the cuffs and hem.",0.933434
1,Yolo Zip L/S,Sweater,Knitwear,"Jumper in a soft, rib knit with a stand-up collar, a visible zip at the front and long sleeves.",0.931779
2,BANANA HALF ZIP SWEATER,Sweater,Knitwear,"Jumper in a soft rib knit with a high stand-up collar, zip at the top, long raglan sleeves with elbow patches, and ribbing at the cuffs and hem.",0.930935
3,Ben zip hoodie,Sweater,Knitwear,"Fine-knit jacket in a soft viscose blend with a drawstring hood, zip and front pockets. Gently dropped shoulders and ribbing at the cuffs and hem.",0.930924
4,Raven Half Zip Sweater,Sweater,Jersey Fancy,"Sports top in stretch, fast-drying functional fabric with a stand-up collar, zip at the front with a chin guard, and a yoke at the back. Low dropped shoulders and long sleeves with thumbholes at the cuffs. Rounded and slightly longer at the back.",0.93011
5,Southern Sweater,Sweater,Knitwear,"Jumper in a soft rib knit with pointelle details, low dropped shoulders and long sleeves. Double ribbed trim around the neckline, and ribbing at the cuffs and hem.",0.929089
6,Zorro half-zip sweater,Sweater,Jersey Basic,"Short top in sweatshirt fabric with a high, ribbed stand-up collar with a zip at the front. Dropped shoulders, long sleeves and ribbing at the cuffs and hem. Soft brushed inside.",0.928748
7,BAY BLOCK STRIPE ZIP-UP,Sweater,Knitwear,"Jumper in a soft, fine-knit modal and cotton blend with a ribbed stand-up collar, zip at the top and long sleeves.",0.928719
8,Yolo Zip LS,Sweater,Knitwear,"Fitted jumper in a soft, rib knit with a turtle neck, visible zip at the top and long sleeves.",0.92771
9,Håkan half zip knit,Sweater,Knitwear,"Jumper in a soft cotton knit with a high, ribbed stand-up collar and zip at the top. Long sleeves, and ribbing at the cuffs and hem.",0.927483



User May Also Be Interested In:



Unnamed: 0,productCode,aggRecScore,searchScore,productType,name,description
0,640755,0.741509,0.850858,Sweater,Allen Sweater,"Top in sweatshirt fabric made from a cotton blend with a stand-up collar with a zip at the front and a kangaroo pocket. Dropped shoulders, long sleeves and ribbing at the cuffs and hem. Soft brushed inside."
1,697980,0.751843,0.839161,Sweater,Nicky,"Jumper in a soft, fine knit with ribbing around the neckline, cuffs and hem."
2,934053,0.768552,0.838258,Sweater,Chain,"Jumper in a soft knit containing some wool. Ribbed neckline decorated with a metal chain at the front, long, wide sleeves and ribbing at the cuffs and hem. The polyester content of the jumper is recycled."
3,935858,1.554463,0.834631,Sweater,Piper sweatshirt,"Top in soft sweatshirt fabric with a rounded, frill-trimmed collar in woven fabric, an opening with a button at the back of the neck, dropped shoulders and long, wide sleeves with ribbing at the cuffs. Soft brushed inside. The polyester content of the sweatshirt is recycled."
4,656401,10.335908,0.834584,Sweater,PASTRY SWEATER,"Jumper in soft, textured-knit cotton with long raglan sleeves and ribbing around the neckline, cuffs and hem."
5,557247,8.560783,0.83333,Sweater,Petar Sweater,"Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside."
6,714826,0.747204,0.832716,Sweater,Shaun,"Jumper in a soft knit containing some wool with ribbing at the top, a wide V-neck front and back, long raglan sleeves and ribbing around the neckline, cuffs and hem."
7,674826,0.709781,0.828895,Sweater,Fine knit,"Jumper in fine-knit slub cotton with dropped shoulders, long sleeves and roll edges around the neckline, cuffs and hem."
8,851339,0.770838,0.828868,Sweater,Papaya Hood,"Oversized jacket in soft sweatshirt fabric with a small embroidered text detail on the front. Jersey-lined, drawstring hood, a zip down the front, front pockets, ribbing at the cuffs and wide ribbing at the hem. Soft brushed inside."
9,244267,6.236464,0.825758,Sweater,Silver lake,Purl-knit jumper in a cotton blend with a slightly wider neckline and 3/4-length sleeves.


## LLM For Generating Grounded Content

Let's use an LLM to automatically generate content for targeted marketing campaigns grounded with our knowledge graph using the above tools.
Here is a quick example for generating promotional emails. but you can create all sorts of content with this!

In [7]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.chat_models import ChatOpenAI, BedrockChat
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough


def load_llm(llm_name: str):
    if llm_name == "gpt-4":
        print("LLM: Using GPT-4")
        return ChatOpenAI(temperature=0, model_name="gpt-4", streaming=True)
    elif llm_name == "gpt-3.5":
        print("LLM: Using GPT-3.5")
        return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
    elif llm_name == "claudev2":
        print("LLM: ClaudeV2")
        return BedrockChat(
            model_id="anthropic.claude-v2",
            model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024},
            streaming=True,
        )
    print("LLM: Using GPT-3.5")
    return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)


llm = load_llm(LLM)

LLM: Using GPT-3.5


In [9]:
def kg_personalized_search_generator(customer_id):
    return Neo4jVector.from_existing_index(
        embedding=embedding_model,
        url=NEO4J_URI,
        username=NEO4J_USERNAME,
        password=NEO4J_PASSWORD,
        index_name='product-text-embeddings',
        retrieval_query=f"""
    WITH node as product, score as searchScore
    OPTIONAL MATCH(product)<-[:VARIANT_OF]-(:Article)<-[r:CUSTOMERS_ALSO_PURCHASED]-(:Article)
    <-[:PURCHASED]-(:Customer {{customerId: '{customer_id}'}})
    WITH  product, searchScore, sum(r.score) AS purchaseScore
    RETURN '##Product:\n' +
        'prodName: ' + product.prodName + '\n' +
        'productTypeName' + product.prodName + '\n' +
        'garmentGroupName' + product.prodName + '\n' +
        'detailDesc: ' + product.detailDesc + '\n' +
        'url: ' + 'https://representative-domain/product/' + product.productCode
        AS text, (1.0 + purchaseScore)*searchScore AS score, {{source: 'https://representative-domain/product/' + product.productCode}} AS metadata
    ORDER BY score DESC LIMIT 5
    """
    )

In [13]:
kg_rec = Neo4jVector.from_existing_index(
    embedding=embedding_model,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name='product-text-embeddings',
    retrieval_query=f"""
    WITH node as product, score as searchScore
    MATCH(product)<-[:VARIANT_OF]-(:Article)-[r:CUSTOMERS_ALSO_PURCHASED]->(:Article)-[:VARIANT_OF]-(recProduct)
    WITH  recProduct, searchScore, sum(r.score) AS recScore
    RETURN '##Product:\n' +
        'prodName: ' + recProduct.prodName + '\n' +
        'productTypeName' + recProduct.prodName + '\n' +
        'garmentGroupName' + recProduct.prodName + '\n' +
        'detailDesc: ' + recProduct.detailDesc + '\n' +
        'url: ' + 'https://representative-domain/product/' + recProduct.productCode
        AS text, (1.0 + recScore)*searchScore AS score, {{source: 'https://representative-domain/product/' + recProduct.productCode}} AS metadata
    ORDER BY score DESC LIMIT 5
    """
)

In [14]:
CUSTOMER_ID = "daae10780ecd14990ea190a1e9917da33fe96cd8cfa5e80b67b4600171aa77e0"

general_system_template = '''
You are a personal assistant named Sally for a fashion, home, and beauty company called HRM.
write an email to {customerName}, one of your customers, to promote and summarize products relevant for them given the current season / time of year: {timeOfYear} .
Please only mention the Products listed below. Do not come up with or add any new products to the list.
Each product description comes with a "url" field. make sure to link to the url with descriptive name text for each product so the customer can easily find them.

---
# Relevant Products:
{searchProds}

# Customer May Also Be Interested In:
{recProds}
---
'''
general_user_template = "{searchPrompt}"
messages = [
    SystemMessagePromptTemplate.from_template(general_system_template),
    HumanMessagePromptTemplate.from_template(general_user_template),
]
prompt = ChatPromptTemplate.from_messages(messages)

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


chain = ({'searchProds': kg_personalized_search_generator(CUSTOMER_ID).as_retriever(k=100) | format_docs,
          'recProds': kg_rec.as_retriever(k=5) | format_docs,
          'customerName': lambda x:'Alex Smith',
          'timeOfYear': lambda x:"Nov, 2023",
          "searchPrompt": RunnablePassthrough()}
         | prompt
         | llm
         | StrOutputParser())

In [None]:
print(chain.invoke("Oversized Sweaters"))

Dear Alex Smith,

I hope this email finds you well. As the weather gets colder, it's the perfect time to cozy up in some oversized sweaters. I wanted to share with you some of our latest and most popular options for this season.

1. Betsy Oversized: This oversized, V-neck jumper is made from a soft, loose knit containing some wool and alpaca wool. It features dropped shoulders, long, wide sleeves, wide ribbing around the neckline, cuffs, and hem, and slits on the sides. You can find it [here](https://representative-domain/product/842001).

2. Japp Oversize Sweater: This relaxed-fit top in sweatshirt fabric is perfect for a casual and comfortable look. It has a ribbed turtle neck, dropped shoulders, long, wide sleeves, and ribbing at the cuffs and hem. The longer back adds a stylish touch. You can find it [here](https://representative-domain/product/817392).

3. HUBBY Oversized Sweater: Made from sturdy sweatshirt fabric, this oversized top features a drawstring hood, low dropped should

In [None]:
print(chain.invoke("western boots"))

Dear Alex Smith,

I hope this email finds you well. As the weather gets cooler and the holiday season approaches, I wanted to share some of our latest products that I think you will love, especially our western boots collection.

1. Brush Western Boot: These ankle boots in imitation leather feature covered elastication at the sides and decorative tabs and buckles. With a comfortable heel height of approximately 4 cm, they are perfect for both casual and dressy occasions. You can find them [here](https://representative-domain/product/806766).

2. Lindsay Western Boot: Made from imitation suede, these ankle boots have a zip on one side and a loop at the back for easy wear. The fabric linings, insoles, and rubber soles ensure comfort and durability. The heel measures 7.5 cm, adding a stylish touch to any outfit. Check them out [here](https://representative-domain/product/673580).

3. Wilma Western Boot SPEED: If you prefer pointed toes and elastic gores, these ankle boots are a great choi

In [None]:
import gradio as gr
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key = "chat_history", return_messages = True)
agent_chain = chain

def chat_response(input_text, history=None):
    if history is None:
        history = []
    return agent_chain.invoke(input_text)
    #try:
    #    return agent_chain.run(input_text)
    #except:
    #    # a bit of protection against exposed error messages
    #    # we could log these situations in the backend to revisit later in development
    #    return "I'm sorry, there was an error retrieving the information you requested."

interface = gr.ChatInterface(fn = chat_response,
                             title = "Message Writer",
                             description = "powered by Neo4j",
                             theme = "soft",
                             chatbot = gr.Chatbot(height=500),
                             undo_btn = None,
                             clear_btn = "\U0001F5D1 Clear chat",
                             examples = ["winter sweaters with zipper?",
                                         "Oversized Sweaters", "Cowboy Boots"])

interface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://a7dbd916736ca91ce6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


In [None]:
#TODO
# Example with just recommendations....no recent search
# Put in email generator in Gradio app for demo

## Wrap Up