In [1]:
from graphdatascience import GraphDataScience
import pandas as pd
import configparser

### Neo4j Settings
The NEO4J_PROPERTIES_FILE is an ini configuration file for Neo4j properties so this notebook can connect to your Neo4j instance and load data. The ini file should be formatted as follows

```
[NEO4J]
PASSWORD=<password>
USERNAME=<username, is 'neo4j' by default>
HOST=<host uri>
```

In [2]:
config = configparser.RawConfigParser()
config.read('/Users/zachblumenfeld/devtools/auth/aura-p2p-fd.ini')
HOST = config['NEO4J']['HOST']
USERNAME = config['NEO4J']['USERNAME']
PASSWORD = config['NEO4J']['PASSWORD']

gds = GraphDataScience(HOST, auth=(USERNAME, PASSWORD), aura_ds=True)

### Clean Up from Last Time

In [3]:
# delete old rels
_ = gds.run_cypher('MATCH (:User)-[r:SHARED_IDS]->() DELETE r')
_ = gds.run_cypher('MATCH (:User)-[r:P2P_WITH_SHARED_CARD]->() DELETE r')

In [4]:
# remove old node labels
_ = gds.run_cypher('MATCH (u:FlaggedUser) REMOVE u:FlaggedUser')
_ = gds.run_cypher('MATCH (u:FraudRiskUser) REMOVE u:FraudRiskUser')

In [5]:
# remove old properties
_ = gds.run_cypher('''
    MATCH (n)
    REMOVE n.wccIdPrep, n.wccId,
        n.sharedIdsDegree,
        n.predictedProbability,
        n.partOfCommunity,
        n.p2pSharedCardPageRank,
        n.p2pSharedCardDegree,
        n.p2pSentWeightedPageRank,
        n.p2pSentWeightedDegree,
        n.p2pSentPageRank,
        n.p2pSentDegree,
        n.p2pReversedSharedCardPageRank,
        n.p2pReversedSharedCardDegree,
        n.p2pReceivedWeightedPageRank,
        n.p2pReceivedWeightedDegree,
        n.p2pReceivedPageRank,
        n.p2pReceivedDegree,
        n.louvainCommunityId,
        n.ipDegree,
        n.fraudRiskRatio,
        n.fraudRiskDegree,
        n.fraudRisk,
        n.flaggedRatio,
        n.flaggedDegree,
        n.deviceDegree,
        n.degree,
        n.communitySize,
        n.cardDegree
''')

In [7]:
# drop all the projected graphs
g_names = gds.graph.list().graphName.tolist()

for g_name in g_names:
    g = gds.graph.get(g_name)
    gds.graph.drop(g)

### Stage Graph

In [8]:
# create business rule relationships and label fraud accounts

# label flagged accounts
_ = gds.run_cypher('''
    MATCH(u:User) WHERE u.fraudMoneyTransfer=1 SET u:FlaggedUser RETURN count(u);
''')

# P2P with shared card rule
_ = gds.run_cypher('''
    MATCH (u1:User)-[r:P2P]->(u2)
    WITH u1, u2, count(r) AS cnt
    MATCH (u1)-[:HAS_CC]->(n)<-[:HAS_CC]-(u2)
    WITH u1, u2, count(DISTINCT n) AS cnt
    MERGE(u1)-[s:P2P_WITH_SHARED_CARD]->(u2)
    RETURN count(DISTINCT s) AS cnt;
''')


# Shared ids rule
_ = gds.run_cypher('''
    MATCH (u1:User)-[:HAS_CC|USED]->(n)<-[:HAS_CC|USED]-(u2)
    WHERE n.degree <= 10 AND id(u1) < id(u2)
    WITH u1, u2, count(DISTINCT n) as cnt
    MATCH (u1)-[:HAS_CC|USED|HAS_IP]->(m)<-[:HAS_CC|USED|HAS_IP]-(u2)
    WITH u1, u2, count(DISTINCT m) as cnt
    WHERE cnt > 2
    MERGE(u1)-[s:SHARED_IDS]->(u2)
    RETURN count(DISTINCT s);
''')


In [9]:
# create labels for fraud risk users
_ = gds.run_cypher('''
    MATCH (u1:User)-[:HAS_CC|USED]->(n)<-[:HAS_CC|USED]-(u2)
    WHERE n.degree <= 10 AND id(u1) < id(u2)
    WITH u1, u2, count(DISTINCT n) as cnt
    MATCH (u1)-[:HAS_CC|USED|HAS_IP]->(m)<-[:HAS_CC|USED|HAS_IP]-(u2)
    WITH u1, u2, count(DISTINCT m) as cnt
    WHERE cnt > 2
    MERGE(u1)-[s:SHARED_IDS]->(u2)
    RETURN count(DISTINCT s);
''')

g, _ = gds.graph.project('comm-projection', ['User'],{
   'SHARED_IDS': {'orientation': 'UNDIRECTED'},
   'P2P_WITH_SHARED_CARD': {'orientation': 'UNDIRECTED'}
})
_ = gds.wcc.write(g, writeProperty='wccIdPrep')

g.drop()

_ = gds.run_cypher('''
    MATCH (f:FlaggedUser)
    WITH collect(DISTINCT f.wccIdPrep) AS flaggedCommunities
    MATCH(u:User) WHERE (NOT u:FlaggedUser) AND (u.wccIdPrep IN flaggedCommunities )
    SET u:FraudRiskUser
    SET u.fraudRisk=1
    RETURN count(u);
''')

_ = gds.run_cypher('''
    MATCH (u:User) WHERE NOT u:FraudRiskUser
    SET u.fraudRisk=0
    RETURN count(u);
''')

_ = gds.run_cypher('''
    MATCH (u:User) REMOVE u.wccIdPrep;
''')