### Script
- **Input:** Real-time in-DB recommendation using Neo4j Graph database.
- **Output:** Sample recommendations.

### Import libraries

In [3]:
!pip3 install py2neo

You should consider upgrading via the '/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [4]:
from pprint import pprint
from py2neo import Graph, NodeMatcher
g = Graph("bolt://localhost:7687/neo4j", password = "test")


ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt://localhost:7687')

In [None]:
query="""
MATCH (a:Amenity) RETURN a.name LIMIT 3
"""
g.run(query).to_data_frame()

In [4]:
query="""
MATCH (a:Listing) WHERE a.price='$150.00' RETURN a.name,a.price,a.url LIMIT 3
"""
g.run(query).to_data_frame()

Unnamed: 0,a.name,a.price,a.url
0,571029,$150.00,https://www.airbnb.com/rooms/571029
1,729260,$150.00,https://www.airbnb.com/rooms/729260
2,733654,$150.00,https://www.airbnb.com/rooms/733654


### Content-based filtering

In [21]:
query = """
MATCH (u:User {name:$cid})-[:RATED]->(s:Listing)-[:HAS_AMENITY]->(c:Amenity)<-[:HAS_AMENITY]-(z:Listing)
WHERE NOT EXISTS ((u)-[:RATED]->(z))
WITH s, z, COUNT(c) AS intersection
MATCH (s)-[:HAS_AMENITY]->(sc:Amenity)
WITH s, z, intersection, COLLECT(sc.name) AS s1
MATCH (z)-[:HAS_AMENITY]->(zc:Amenity)
WITH s, z, s1, intersection, COLLECT(zc.name) AS s2
WITH s, z, intersection, s1+[x IN s2 WHERE NOT x IN s1] AS union, s1, s2
RETURN s.name as UserListing, z.name as Recommendate, s1 as UserListingAmenities, s2 as RecommendateListingAmenities, ((1.0*intersection)/SIZE(union)) AS jaccard ORDER BY jaccard DESC LIMIT $k;
"""
recos=[]
recoAmenity=[]
for row in g.run(query, cid = "8726758", k = 5).data():
    recos.append('https://www.airbnb.com/rooms/'+row['Recommendate'])
    recoAmenity.append(row['UserListingAmenities'])


In [22]:
recos

['https://www.airbnb.com/rooms/17968731',
 'https://www.airbnb.com/rooms/2211896',
 'https://www.airbnb.com/rooms/9104541',
 'https://www.airbnb.com/rooms/43978539',
 'https://www.airbnb.com/rooms/17968930']

### Collaborative Filtering
- https://www.datatheque.com/posts/collaborative-filtering/
- https://www.kernix.com/article/an-efficient-recommender-system-based-on-graph-database/

In [79]:
user_id="8726758"

from pprint import pprint
from py2neo import Graph, NodeMatcher
g = Graph("bolt://localhost:7687/neo4j", password = "test")

query = """
        // Get count of all distinct products that user 4789 has purchased and find other users who have purchased them
        MATCH (u1:User {name:$uid})-[x:RATED]->(m:Listing)<-[y:RATED]-(u2:User)
        WHERE u1 <> u2
        WITH u1, u2, COUNT(DISTINCT m) as intersection_count
        
        // Get count of all the distinct products that are unique to each user
        MATCH (u:User)-[:RATED]->(m:Listing)
        WHERE u in [u1, u2]
        WITH u1, u2, intersection_count, COUNT(DISTINCT m) as union_count
       
        // Compute Jaccard index
        WITH u1, u2, intersection_count, union_count, (intersection_count*1.0/union_count) as jaccard_index
        
        // Get top k neighbours based on Jaccard index
        ORDER BY jaccard_index DESC, u2.id
        WITH u1, COLLECT([u2.name, jaccard_index, intersection_count, union_count])[0..$k] as neighbours
        RETURN u1.name as user, neighbours
        """
neighbours = {}
for row in g.run(query, uid=user_id, k=5):
    neighbours[row[0]] = row[1]

neighbours_ids = [x[0] for x in neighbours[row[0]]]

query = """
        // Get top n recommendations for user from the selected neighbours
        MATCH (u1:User),
              (neighbour:User)-[:RATED]->(p:Listing)        // get all listings rated by neighbour
        WHERE u1.name = $uid
          AND neighbour.id in $neighbours
          AND not (u1)-[:RATED]->(p)                        // which u1 has not already bought
        
        WITH u1, p, COUNT(DISTINCT neighbour) as cnt                                // count times rated by neighbours
        ORDER BY u1.name, cnt DESC                                               // and sort by count desc
        RETURN u1.name as user, COLLECT([p.name,cnt])[0..$k] as recos  
        """

recos = {}
for row in g.run(query, uid=user_id, neighbours=neighbours_ids, k=5):
    recos[row[0]] = row[1]
    
recommended_ids = [x[0] for x in recos[row[0]]]   

query="""
MATCH (a:Listing) WHERE a.name in $name RETURN a.name,a.beds,a.bedrooms,a.bathrooms,a.picture_url,a.accomodates,a.review_scores_rating,a.price,a.url;
"""



In [93]:
g.run(query, name=recommended_ids).to_data_frame()


Unnamed: 0,a
0,"{'bedrooms': '4.0', 'review_scores_rating': 'v..."
1,"{'bedrooms': '1.0', 'review_scores_rating': 'v..."
2,"{'bedrooms': '2.0', 'review_scores_rating': 'v..."
3,"{'bedrooms': '2.0', 'review_scores_rating': 'v..."
4,"{'bedrooms': '1.0', 'review_scores_rating': 'v..."
