In [13]:
from graphdatascience import GraphDataScience
from getpass import getpass

# Connect to Neo4j

In [14]:
neo4j_password = getpass("Neo4j password")

In [15]:
url = "neo4j+s://5f8297f1.databases.neo4j.io"
username = "neo4j"
gds = GraphDataScience(url, auth=(username, neo4j_password))

# Create new user descriptions that are similar to existing profiles, but a property is off by a small change

In [35]:
def create_similar_profile(labels_to_change, props_to_change, profile_count):
    id_df = gds.run_cypher(
        """
        MATCH (u1:UserDescription)
        WITH u1 ORDER BY rand()
        LIMIT $profileCount
        CREATE (u2:UserDescription {userId:apoc.create.uuid()})
        WITH u1, u2
        MATCH (u1)-[r]->(i)
        WHERE labels(i)[0] IN $labelsToChange
        WITH u1, u2, labels(i)[0] AS labelToChange,
        type(r) AS typeToCopy, properties(i) AS props
        WITH u1, u2, labelToChange, typeToCopy, props, [key IN keys(props) WHERE key in $propsToChange | key] AS propsToChange
        WITH  u1, u2, labelToChange, typeToCopy, props, propsToChange, apoc.map.mget(props, propsToChange) AS valuesToChange
        WITH  u1, u2, labelToChange, typeToCopy, props, propsToChange, [val in valuesToChange | substring(val, 0, size(val)-2) + reverse(substring(val, size(val)-2))] AS changedValues
        WITH  u1, u2, labelToChange, typeToCopy, props, apoc.map.fromLists(propsToChange, changedValues) AS updateProps
        WITH u1, u2, labelToChange, typeToCopy, props, apoc.map.merge(props, updateProps) AS updateProps
        WITH u1, u2, labelToChange, typeToCopy, updateProps
        CALL apoc.merge.node([labelToChange], updateProps) YIELD node
        WITH u1, u2, typeToCopy, node AS n
        CALL apoc.create.relationship(u2, typeToCopy, null, n) YIELD rel
        WITH DISTINCT u1, u2
        MATCH (u1)-[r]->(i) WHERE NOT labels(i)[0] IN $labelsToChange
        CALL apoc.create.relationship(u2, type(r), null, i) YIELD rel
        RETURN DISTINCT u1.userId AS id1, u2.userId AS id2
        """,
        {"labelsToChange":labels_to_change, "propsToChange":props_to_change, "profileCount":profile_count}
    )
    return id_df

In [21]:
phone_diff = create_similar_profile(["PhoneNumber"], ["phoneNumber"], 500)

In [40]:
phone_diff

Unnamed: 0,id1,id2
0,g1bvctbd-q9td-2id2-djin-jf3jr05tdiiq,0e64c7ef-c4de-4a5f-93d5-3fc96a16acad
1,3qs23vuq-fn0s-mlcg-axu7-thnvhi29pt2v,2cd6a007-1587-41f1-bcd2-037430094407
2,j8ccs70i-7wlw-adsv-tnny-8he7mxwfe9px,be8bd515-4b29-44fe-bb22-b260e2149a38
3,2l0ex1n1-xbxr-leck-lw83-x6hwsefz84bq,dd836e6a-8bd2-4140-a7d5-9d0da707e7d9
4,hgcfqqmc-pysk-xk41-wlee-57vbfwn6y1qo,4dce3fe4-7070-4012-8e91-3891f7b8f7b5
...,...,...
495,awrsp1bo-dwn6-ql2k-xxoa-m7t6eiwxw9nz,d451ea72-7c6e-463c-80c9-9c8cb192ada2
496,l5ks4wzf-p8ds-mdb2-b1s7-lam8v3o5imi1,ade62b2e-d3d5-4828-b1d9-c72a3a0bb0be
497,fvfetjvn-kswb-b6re-xkn6-y14yk3x3qnl2,d02c4f1e-9d8c-4a6c-ac57-50f7b391f59d
498,5ho0jmm7-ms3n-fgoo-s6nv-w2mw9yx0v7i6,17db1a4c-c438-4b23-992a-af524130ccdd


In [22]:
ssn_diff = create_similar_profile(["SocialSecurityNumber"], ["socialSecurityNumber"], 500)

In [41]:
ssn_diff

Unnamed: 0,id1,id2
0,2uw9rrjv-7iaw-aiyb-61lg-sx0injz1uva6,be023eb2-5797-40e7-b51e-bb759b8f5599
1,cd0tgg2f-2wbx-03ly-jss6-pdt7ds10a3j8,d815c212-57fe-4e18-9baa-e386d9df49d0
2,oh71y3mn-orzi-c5sb-phv0-qivxikewi3sy,4faf596e-01bd-40ef-8f0e-9724fe9b9224
3,901m85ks-cw52-bwhl-f9q1-3flrj0qszd6r,4faefa76-3717-4511-81cf-592f2eb9e780
4,ikl1zioo-69vx-vbov-58sm-837ywimrmxoj,8bbd47cd-8951-4e44-9847-07a4bf97ee01
...,...,...
495,4frud64b-4gat-hsw6-lidu-leb7n3pbc8yi,39afd162-dc68-456c-bf35-b14453c3e668
496,ornzx1p5-b08o-or5k-vpl8-cz7nd5g9qe4y,5b005361-e941-4d41-a9d0-9206df222be8
497,l51p99yf-ds0s-vkdc-96uv-dgmt8b4gh81g,c5223bad-af23-4181-a173-90e00d01a654
498,94txgrlu-3avf-1oqy-imhu-465bvsuseyb0,30889bb6-8f46-45f1-b29c-f9fabe568495


In [23]:
email_diff = create_similar_profile(["Email"], ["email"], 500)

In [42]:
email_diff

Unnamed: 0,id1,id2
0,eg4iu59i-tjtq-l4it-37b4-s62phleja51b,ca3444b8-8619-4eef-a051-57e478cf36a0
1,jua2dk3l-dpcz-g5mp-nl5t-uni3cnha7n6z,a826f325-958b-4d35-ba42-1fcb855c455d
2,ji4ky83i-vo8c-ezpn-dijz-fq3fiq2eljnk,6c7eefc5-0a95-4fb7-8eb7-68ea5c2e9d3f
3,lfmr0vsx-3ir4-80gw-dehx-um6qbhttyihh,778652a5-6e6a-4391-88ab-9cd0a2216cb5
4,any9e9mb-2gu0-sbtp-x1v2-9vr0d8u64fm0,a40a1afb-8c33-45c8-b880-4e81ec2633d0
...,...,...
495,c3y7o1vv-t61y-rle8-7ph3-862wv9gcvrep,f9697dbf-2ef8-4ec2-a53a-3e3134b844c4
496,wbdt3img-f5ao-n2wb-tf5d-foma16zc7htx,7679920f-6c0a-407b-9e35-cd7f0200cc45
497,9vqin4zz-2vl1-co8m-aw7o-9qpz0jw7bnfw,b257a6f0-0865-4ddb-9591-e448fd64f786
498,s71bhrk0-khl7-4sun-wzmn-9nm5l511j411,d3edbed7-19cc-47d4-a236-6a19497dfa8a


In [24]:
email_diff

Unnamed: 0,id1,id2
0,eg4iu59i-tjtq-l4it-37b4-s62phleja51b,ca3444b8-8619-4eef-a051-57e478cf36a0
1,jua2dk3l-dpcz-g5mp-nl5t-uni3cnha7n6z,a826f325-958b-4d35-ba42-1fcb855c455d
2,ji4ky83i-vo8c-ezpn-dijz-fq3fiq2eljnk,6c7eefc5-0a95-4fb7-8eb7-68ea5c2e9d3f
3,lfmr0vsx-3ir4-80gw-dehx-um6qbhttyihh,778652a5-6e6a-4391-88ab-9cd0a2216cb5
4,any9e9mb-2gu0-sbtp-x1v2-9vr0d8u64fm0,a40a1afb-8c33-45c8-b880-4e81ec2633d0
...,...,...
495,c3y7o1vv-t61y-rle8-7ph3-862wv9gcvrep,f9697dbf-2ef8-4ec2-a53a-3e3134b844c4
496,wbdt3img-f5ao-n2wb-tf5d-foma16zc7htx,7679920f-6c0a-407b-9e35-cd7f0200cc45
497,9vqin4zz-2vl1-co8m-aw7o-9qpz0jw7bnfw,b257a6f0-0865-4ddb-9591-e448fd64f786
498,s71bhrk0-khl7-4sun-wzmn-9nm5l511j411,d3edbed7-19cc-47d4-a236-6a19497dfa8a


In [38]:
phone_ssn_diff = create_similar_profile(["PhoneNumber", "SocialSecurityNumber"], ["phoneNumber", "socialSecurityNumber"], 500)

In [39]:
phone_ssn_diff

Unnamed: 0,id1,id2
0,qj7y2inm-7gt6-cx2w-j3hq-c6qrugrsx52a,d9a8ee4a-f637-4a93-b3e2-cd7452ad161f
1,6508yz52-y3j4-b801-t49m-tuv0tftqxet3,298c046f-00a0-4b43-9e92-cd0ff00d81b0
2,71ib0nru-fpd4-6n0t-bdry-2b34x2d3kiy4,205c8338-02b8-4b6b-90c3-feae7d92e531
3,7ilfphby-787v-ajx6-jqro-p891kqspihr4,8c43c84f-4ec4-4a5f-9428-0d118b615e6e
4,8wezqabd-ufuf-1rfn-zxt8-uznlzro7e5hg,6219238d-6b20-4a79-a716-e2f8933eb4b0
...,...,...
495,kdit7fli-be6v-9c6n-lwix-px71r8bytkop,75da63f5-e9eb-41f5-9bb1-a75562713996
496,cwtv3b72-b6zb-om6j-jxjg-k4z2lftkf70f,31fa9bf2-0600-466f-a77e-a2b88da57e2c
497,rzvml8cd-ntb4-8nyq-9h1x-ntrbclzmuktm,fb132f37-ec48-427c-b363-d30de142c718
498,ne7go4b4-rr2x-9wuv-dfh2-4ziuut66hao9,9515a136-2fee-4243-9c67-da5bf9643675
