In [1]:
import json
config = json.load(open("./config.json"))
import pandas

In [2]:
from neo4j import GraphDatabase
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
#         return response
        return pandas.DataFrame([dict(_) for _ in response])

In [3]:
import os


In [4]:
conn = Neo4jConnection(uri=config['GDB_URL'], user=config['GDB_NAME'], pwd=config['GDB_PASS'])


# 先對特定的使用者來進行研究
＊　

### 推薦內容

In [10]:
store_recommendation_query="""
// Store Recommendation
match (u1:User)-[r1:BUY]->(store:MCC)<-[r2:BUY]-(u2:User)-[r3:BUY]->(rstore:MCC) 
where toInteger(u1.userId)/100 % 1000 =0 
with u1.userId as userId ,rstore.storeId as storeId ,
sum(toFloat(r1.buy_count)+toFloat(r2.buy_count)+toFloat(r3.buy_count)) as score 
return userId,storeId,score order by score desc
"""

In [42]:
predict_result = conn.query(store_recommendation_query)
predict_result[:200]

Unnamed: 0,userId,storeId,score
0,5816679071986200000,5311,167951.0
1,5816679071986200000,5812,140122.0
2,5816679071986200000,5541,116976.0
3,5816679071986200000,MRT5,98970.0
4,5816679071986200000,5399,92505.0
...,...,...,...
195,373209349652500000,6300,15564.0
196,8494650578933900000,4812,15176.0
197,8115143336766400000,5735,15134.0
198,8494650578933900000,5735,15128.0


In [12]:
predict_result['userId'].describe()

count                    2566
unique                      9
top       1374024424485700000
freq                      291
Name: userId, dtype: object

In [39]:
predict_frame_user = predict_result.groupby("userId")

In [41]:
predict_user2stores = predict_frame_user.agg({ 'storeId': lambda x: ' '.join(x)})
predict_user2stores = predict_user2stores.to_dict()['storeId']
predict_user2stores

{'1052133098591800000': '5411 5812 5541 5399 5331 MRT5 5499 5311 5814 5542 6300 4112 MRT1 7011 5816 4121 5735 5691 5712 4722 ETC1 5732 5942 4812 4814 7372 5811 5941 5965 5621 5912 9399 5309 7538 8398 7832 4011 5462 5977 4899 5999 5968 4816 5699 5815 8999 5045 5641 5661 8062 7399 8043 5995 7997 ICH5 5945 7230 7999 5947 4511 8211 5817 5963 4131 7311 8220 5722 7996 7994 5964 5921 5813 5651 5533 8651 8299 7922 7512 5734 5818 5571 7523 5943 4214 5251 MRT3 4784 3090 4111 5944 3078 5948 ICH1 5611 5122 5969 5200 8011 5441 5300 5970 2741 6011 7395 7991 5310 4900 8099 7992 4789 7298 5946 5192 5531 2842 5511 3084 7941 5960 5967 5631 5719 7012 7531 8641 5655 7333 8249 1520 7297 8244 7277 7278 7273 9311 7911 7032 7392 5451 5521 5714 7221 5962 5422 7993 5971 MRT9 4411 5139 7542 5047 3006 7361 5994 0763 7379 7299 5992 3099 4225 8661 ICH3 5211 3261 5949 MMA5 7998 5532 5940 3015 ETC3 5111 6513 5733 MRT4 5065 3640 3780 8049 MRT2 7841 0742 3079 3642 7210 7519 IPS1 5599 8699 5169 IPS5 5094 5993 3504 3577 

### TODO : Filter with Score and fetch top N Store list 

### 這個使用者的 Log

In [17]:
q = """
MATCH (u1:User)-[r]-(s)  
where toInteger(u1.userId)/100 % 1000 =0
return u1.userId as userId,s.storeId as storeId ,r.buy_count as buy_count
"""
result = conn.query(q)


In [24]:
result['userId'].describecribe()

count                      80
unique                      9
top       5816679071986200000
freq                       12
Name: userId, dtype: object

In [21]:
frame_user = result.groupby("userId")

In [33]:
user2stores= frame_user.agg({ 'storeId': lambda x: ' '.join(x)})
user2stores = user2stores.to_dict()['storeId']

In [37]:
# Actual Result
user2stores

{'1052133098591800000': '4816 5311 5968 5817 5045 4814 8999 7372 5734',
 '1374024424485700000': '5411 4814 5691 5200 5712 5311 5812 5541 5811 5814',
 '3174630615041900000': 'ETC1 MRT1 MRT5 5812 7999 5399',
 '373209349652500000': '5499 5411 4812',
 '5816679071986200000': '5399 5691 7997 5331 5714 4812 MRT5 5411 5251 5712 MRT1 9399',
 '5886367677285800000': '5399 5942 5499 5812 5941 5811 5712 5311 5411 MRT5 7230',
 '6103727475792800000': '9399 5499 5691 6300 MRT5 5309 5311 4112 5411',
 '8115143336766400000': '5699 8220 ETC1 4784 5999 9399 5311 5331 5541 6300 5812',
 '8494650578933900000': '4812 5411 5311 5812 5399 5941 5732 5811 5661'}

In [19]:
jaccard_acc = len(top_30.intersection(actual_result))/len(top_30.union(actual_result))

NameError: name 'top_30' is not defined