### Script
- **Input:** real-time querying on ConceptNet5 number-batch in the Neo4j Graph database.
- **Output:** Micro-tags(concepts) generated for listing amenities and stored in a redis like memcache locally.
              
- **Note:** 
    - This is a workaround to minimize the response time in a real-time conversation. 
    - The mem cache can be synchronized in a distributed system on data-event triggers.
    - The micro-tags(related concepts) will be stored as dictionary values with the actual
      amenity as keys and used for enhaned search for matched listings(item attributes) in the Neo4j Graph DB. 


In [69]:
!pip install py2neo
from py2neo import Graph
import re
import pandas as pd

# use neo4j for real-time recommendations.
g = Graph("bolt://localhost:7687/neo4j", password = "test")



In [70]:
df = pd.read_csv('./Data/processing/Neo4j/Amenity.csv')
amenities = df['name'].unique()
len(amenities)

889

In [71]:
# dic = dict()

# def realTimeRecommendation(amenity,topK):		
# 	concepts=""

# 	query = """
# 							CALL ga.nlp.ml.word2vec.nn($wid, $k, 'en-ConceptNet5') YIELD word, distance RETURN word AS list;
# 							"""
# 	for row in g.run(query, wid=amenity, k=topK):
# 		processed=re.sub('[^a-zA-Z0-9]+', ' ', row[0])
# 		concepts+=str(processed)+","
	
# 	print(amenity+" concepts cached")
# 	amenity=str(amenity)
# 	dic[amenity] = dic.get(amenity, [])
# 	dic[amenity].append(concepts.rstrip(',')) 

# for amenity in amenities:
# 	try:
# 		amen = amenity.split(' ')
# 		for a in amen:
# 			realTimeRecommendation(str(a.lower()),10)
# 	except:
# 		print('no split character')
# 		realTimeRecommendation(str(amenity.lower()),10)

    

In [72]:
# import json
# with open('RASA_ConceptNet5/mem_cache_conceptNet5.json', 'w') as outfile:
#     json.dump(dic,outfile)

In [73]:
# outfile.close()

In [74]:
# data={}
# with open('RASA_ConceptNet5/mem_cache_conceptNet5.json') as json_file:
#     data = json.load(json_file)


# if 'refrigerator' in data:
#     print(data['refrigerator'])

## Fetch data for metrics - ESCI

In [75]:
import pandas as pd 
test = pd.read_csv('./Data/test/test_user_queries.csv',index_col=[0])
entity = test['entity'].unique()
len(entity)

2823

In [76]:
# 100 user queries 
#Query | #Entity | #Domain-graph-response | #Common-sense-Domain-graph-response| #Matched_concepts

top_100_entity = entity[1150:1195]
top_100_entity


array(['toddler', 'kitchen area', 'sitting room', 'highlights', 'adults',
       'channels', 'trepidation', 'day vacation', 'peter', 'arrange',
       'tours', 'teenagers', 'umbrellas', 'toys', 'kids', 'mooi',
       'hip issues', 'flights', 'knee', 'inlays', 'fans', 'feedback',
       'group', 'staircases', 'school teacher', 'similarities', 'femmes',
       'diseño', 'estética', 'calme', 'shopping area', 'birthday cake',
       'boutique shops', 'nogmaals', 'family', 'property', 'venue',
       'worm', 'clothing stores', 'coffee machine', 'huis', 'mooie',
       'side street', 'walks', 'landmarks'], dtype=object)

In [77]:
import numpy as np 
top_100_entity= ['kettle','heater','pets','emergency','first aid', 'cooktop','king bed','laundry',
       'clothing stores', 'coffee machine','shopping area','birthday cake','boutique shops',
       'toddler', 'kitchen area', 'sitting room','staircases',
       'refrigerator','washer','crib','dryer','fireplace',
       'hill view','cabin','safety','privacy','pool','indoor games',
       'patio','parking','umbrellas', 'toys', 'kids', 'sand pit','tram station','buses','public transport','drying clothes','river view','flower arts']

In [78]:
print("Connected to Neo4j")

Connected to Neo4j


In [79]:
#used to retrieve data from conceptNet5 in real-time.
def getConceptTags(word,topK):		
	collection = []
	word = word.replace(' ', '_')
	
	words_to_query=[]
	words_to_query.append(word)
	
	sp_word = word.split('_')
	for sp in sp_word:
		words_to_query.append(sp)

	query = """
				CALL ga.nlp.ml.word2vec.nn($wid, $k, 'en-ConceptNet5') YIELD word, distance RETURN word AS list;
			"""
	for word in words_to_query:
		print('word:',word)
		for row in g.run(query, wid=word, k=topK):
			processed=re.sub('[^a-zA-Z0-9]+', ' ', row[0])
			collection.append(processed)

	return collection

In [80]:
def run_test_amenity(prediction,topK,concept_count):

	if prediction:
		#replace with dynamic value.
		word=str(prediction)
		word=word.lower()

		print(word)

		user_query.append("find me listings that match entity, "+word)
		query_entity.append(word)
		
		query_string=""
		query_string="MATCH (r:Amenities)-[]-(l:Listing) WHERE "
		query_string+=" r.name=~'(?i).*"+word.lower()+".*'"
		query_string+=" RETURN l.url as url,r.name as description  LIMIT "+str(topK)+";"

		query = ""+query_string+""
		count=0
		for row in g.run(query, query_string=query_string,k=topK):
			# print(row)
			# print(str(row['url']),str(row['description']))
			domain.append(str(row['url']))
			domain_text.append(str(row['description']))
			count+=1
				
		if count==0:
			domain.append("No matched listings")
			domain_text.append("No matched listings")
			print("No matched listings")


	
	else:
		print("No matched listings")

		return []

user_query=[]
query_entity=[]
domain=[]
domain_text=[]
commonsense=[]
tags_matched=[]

for entity in top_100_entity:
    run_test_amenity(entity,1,10) #entity,topK,NoOfConcepts

df = pd.DataFrame(list(zip(user_query, query_entity,domain,domain_text)),
               columns =['user_query', 'query_entity','domain','text'])
test='./Data/test/'
df.to_csv(test+'test_results_domain_amenity.csv')
df.head(2)


kettle
heater
pets
emergency
No matched listings
first aid
cooktop
No matched listings
king bed
No matched listings
laundry
clothing stores
No matched listings
coffee machine
shopping area
No matched listings
birthday cake
No matched listings
boutique shops
No matched listings
toddler
No matched listings
kitchen area
No matched listings
sitting room
No matched listings
staircases
No matched listings
refrigerator
washer
crib
dryer
fireplace
hill view
No matched listings
cabin
No matched listings
safety
privacy
No matched listings
pool
indoor games
No matched listings
patio
parking
umbrellas
No matched listings
toys
kids
No matched listings
sand pit
No matched listings
tram station
No matched listings
buses
No matched listings
public transport
No matched listings
drying clothes
No matched listings
river view
No matched listings
flower arts
No matched listings


Unnamed: 0,user_query,query_entity,domain,text
0,"find me listings that match entity, kettle",kettle,https://www.airbnb.com/rooms/13929944,Hot water kettle
1,"find me listings that match entity, heater",heater,https://www.airbnb.com/rooms/51790429,Portable heater


In [81]:
def run_test_review(prediction,topK,concept_count):

	if prediction:
		#replace with dynamic value.
		word=str(prediction)
		word=word.lower()

		print(word)

		user_query.append("find me listings that match entity, "+word)
		query_entity.append(word)
		
		query_string=""
		query_string="MATCH (r:Review_Text)-[]-(l:Listing) WHERE "
		query_string+=" r.name=~'(?i).*"+word.lower()+".*'"
		query_string+=" RETURN l.url as url,r.name as description  LIMIT "+str(topK)+";"

		query = ""+query_string+""
		count=0
		for row in g.run(query, query_string=query_string,k=topK):
			# print(row)
			# print(str(row['url']),str(row['description']))
			domain.append(str(row['url']))
			domain_text.append(str(row['description']))
			count+=1
				
		if count==0:
			domain.append("No matched listings")
			domain_text.append("No matched listings")
			print("No matched listings")


	
	else:
		print("No matched listings")

		return []

user_query=[]
query_entity=[]
domain=[]
domain_text=[]
commonsense=[]
tags_matched=[]

for entity in top_100_entity:
    run_test_review(entity,1,10) #entity,topK,NoOfConcepts

df = pd.DataFrame(list(zip(user_query, query_entity,domain,domain_text)),
               columns =['user_query', 'query_entity','domain','text'])
test='./Data/test/'
df.to_csv(test+'test_results_domain_review.csv')
df.head(2)


kettle
heater
pets
emergency
first aid
cooktop
king bed
laundry
clothing stores
coffee machine
shopping area
birthday cake
boutique shops
toddler
kitchen area
sitting room
staircases
refrigerator
washer
crib
dryer
fireplace
hill view
No matched listings
cabin
safety
privacy
pool
indoor games
No matched listings
patio
parking
umbrellas
toys
kids
sand pit
No matched listings
tram station
buses
public transport
drying clothes
river view
flower arts
No matched listings


Unnamed: 0,user_query,query_entity,domain,text
0,"find me listings that match entity, kettle",kettle,https://www.airbnb.com/rooms/20168,"[""the location of sasha's b&b makes it ideal f..."
1,"find me listings that match entity, heater",heater,https://www.airbnb.com/rooms/2818,['daniel is really cool. the place was nice an...


In [82]:
def run_conceptNet5Amenity_test(prediction,topK,concept_count):

	if prediction:
		#replace with dynamic value.
		word=str(prediction)
		word=word.lower()

		print(word)


		user_query.append("find me listings that match entity, "+word)
		query_entity.append(word)
		
		print("fetching concepts related to:",word)
		collection = getConceptTags(word, concept_count)
		
		query_string=""

		query_string="MATCH (r:Amenities)-[]-(l:Listing) WHERE "
		tags=""
		for item in collection:
			query_string+="r.name=~'(?i).*"+item.lower()+".*' or "
			tags+=item.lower()+","
		query_string+=" r.name=~'(?i).*"+word.lower()+".*'"
		query_string+=" RETURN l.url as url,r.name as description  LIMIT "+str(topK)+";"
				
		query = ""+query_string+""
		count=0
		for row in g.run(query, query_string=query_string,k=topK):
			# print(row)
			print(str(row['url']))
			domain_text.append(str(row['description']))
			commonsense.append(str(row['url']))		
			tags_matched.append(tags.rstrip(','))
			count+=1
				
		if count==0:
			domain_text.append("no matched listings")
			commonsense.append("no matched listings")
			tags_matched.append("no matched concepts")
		else:
			print('Recommendation based on the following similar ConceptNet5(common-sense network) tags:\n')
			print(tags.rstrip(','))

	
	else:
		print("No matched listings")

		return []

user_query=[]
query_entity=[]
domain=[]
domain_text=[]
commonsense=[]
tags_matched=[]

for entity in top_100_entity:
    run_conceptNet5Amenity_test(entity,1,10) #entity,topK,NoOfConcepts

df = pd.DataFrame(list(zip(user_query, query_entity,commonsense,tags_matched)),
               columns =['user_query', 'query_entity','commonsense','text'])
test='./Data/test/'
df.to_csv(test+'test_results_commonsense_amenity.csv')
df.head(2)


kettle
fetching concepts related to: kettle
word: kettle
word: kettle
https://www.airbnb.com/rooms/13929944
Recommendation based on the following similar ConceptNet5(common-sense network) tags:

kettle,kettleful,electric kettle,heating water,kettles,iron pot,making tea,teakettle,chimney hook,kettle hole,kettle,kettleful,electric kettle,heating water,kettles,iron pot,making tea,teakettle,chimney hook,kettle hole
heater
fetching concepts related to: heater
word: heater
word: heater
https://www.airbnb.com/rooms/51790429
Recommendation based on the following similar ConceptNet5(common-sense network) tags:

heater,solar heater,heaters,oil heater,box iron,calefactor,gas heater,space heater,oilstove,heat room,heater,solar heater,heaters,oil heater,box iron,calefactor,gas heater,space heater,oilstove,heat room
pets
fetching concepts related to: pets
word: pets
word: pets
https://www.airbnb.com/rooms/42431273
Recommendation based on the following similar ConceptNet5(common-sense network) tags:


Unnamed: 0,user_query,query_entity,commonsense,text
0,"find me listings that match entity, kettle",kettle,https://www.airbnb.com/rooms/13929944,"kettle,kettleful,electric kettle,heating water..."
1,"find me listings that match entity, heater",heater,https://www.airbnb.com/rooms/51790429,"heater,solar heater,heaters,oil heater,box iro..."


In [83]:
def run_conceptNet5Review_test(prediction,topK,concept_count):

	if prediction:
		#replace with dynamic value.
		word=str(prediction)
		word=word.lower()

		print(word)


		user_query.append("find me listings that match entity, "+word)
		query_entity.append(word)
		
		print("fetching concepts related to:",word)
		collection = getConceptTags(word, concept_count)
		
		query_string=""

		query_string="MATCH (r:Review_Text)-[]-(l:Listing) WHERE "
		tags=""
		for item in collection:
			query_string+="r.name=~'(?i).*"+item.lower()+".*' or "
			tags+=item.lower()+","
		query_string+=" r.name=~'(?i).*"+word.lower()+".*'"
		query_string+=" RETURN l.url as url,r.name as description  LIMIT "+str(topK)+";"
				
		query = ""+query_string+""
		count=0
		for row in g.run(query, query_string=query_string,k=topK):
			# print(row)
			print(str(row['url']))
			domain_text.append(str(row['description']))
			commonsense.append(str(row['url']))		
			tags_matched.append(tags.rstrip(','))
			count+=1
				
		if count==0:
			domain_text.append("no matched listings")
			commonsense.append("no matched listings")
			tags_matched.append("no matched concepts")
		else:
			print('Recommendation based on the following similar ConceptNet5(common-sense network) tags:\n')
			print(tags.rstrip(','))

	
	else:
		print("No matched listings")

		return []

user_query=[]
query_entity=[]
domain=[]
domain_text=[]
commonsense=[]
tags_matched=[]

for entity in top_100_entity:
    run_conceptNet5Review_test(entity,1,10) #entity,topK,NoOfConcepts

df = pd.DataFrame(list(zip(user_query, query_entity,commonsense,tags_matched)),
               columns =['user_query', 'query_entity','commonsense','text'])
test='./Data/test/'
df.to_csv(test+'test_results_commonsense_review.csv')
df.head(2)


kettle
fetching concepts related to: kettle
word: kettle
word: kettle
https://www.airbnb.com/rooms/20168
Recommendation based on the following similar ConceptNet5(common-sense network) tags:

kettle,kettleful,electric kettle,heating water,kettles,iron pot,making tea,teakettle,chimney hook,kettle hole,kettle,kettleful,electric kettle,heating water,kettles,iron pot,making tea,teakettle,chimney hook,kettle hole
heater
fetching concepts related to: heater
word: heater
word: heater
https://www.airbnb.com/rooms/2818
Recommendation based on the following similar ConceptNet5(common-sense network) tags:

heater,solar heater,heaters,oil heater,box iron,calefactor,gas heater,space heater,oilstove,heat room,heater,solar heater,heaters,oil heater,box iron,calefactor,gas heater,space heater,oilstove,heat room
pets
fetching concepts related to: pets
word: pets
word: pets
https://www.airbnb.com/rooms/2818
Recommendation based on the following similar ConceptNet5(common-sense network) tags:

pets,kept 

Unnamed: 0,user_query,query_entity,commonsense,text
0,"find me listings that match entity, kettle",kettle,https://www.airbnb.com/rooms/20168,"kettle,kettleful,electric kettle,heating water..."
1,"find me listings that match entity, heater",heater,https://www.airbnb.com/rooms/2818,"heater,solar heater,heaters,oil heater,box iro..."
