In [0]:
from graphdatascience import GraphDataScience
from getpass import getpass

In [0]:
neo4j_password = getpass("Neo4j password")

Neo4j password [REDACTED]

In [0]:
url = "neo4j+s://5f8297f1.databases.neo4j.io"
username = "neo4j"
gds = GraphDataScience(url, auth=(username, neo4j_password))

In [0]:
def create_fulltext_index(index_name, label, property, analyzer):
    query = f"""
        CREATE FULLTEXT INDEX $indexName IF NOT EXISTS FOR (n:{label}) ON EACH [n.{property}]
        OPTIONS {{
            indexConfig: {{
                `fulltext.analyzer`: $analyzer,
                `fulltext.eventually_consistent`: true
            }}
        }}"""
    gds.run_cypher(query, {"indexName":index_name, "analyzer":analyzer})

create_fulltext_index("full_name_full_text", "PersonName", "fullName", "keyword")
create_fulltext_index("phone_number_full_text", "PhoneNumber", "phoneNumber", "keyword")
create_fulltext_index("ssn_full_text", "SocialSecurityNumber", "socialSecurityNumber", "keyword")
create_fulltext_index("email_full_text", "Email", "email", "email")
create_fulltext_index("dob_string_full_text", "DOB", "birthdateString", "keyword")

gds.run_cypher("show fulltext indexes")

Failed to write data to connection ResolvedIPv4Address(('34.28.32.244', 7687)) (ResolvedIPv4Address(('34.28.32.244', 7687)))
Failed to write data to connection IPv4Address(('5f8297f1.databases.neo4j.io', 7687)) (ResolvedIPv4Address(('34.28.32.244', 7687)))


Unnamed: 0,id,name,state,populationPercent,type,entityType,labelsOrTypes,properties,indexProvider,owningConstraint,lastRead,readCount
0,18,dob_string_full_text,ONLINE,100.0,FULLTEXT,NODE,[DOB],[birthdateString],fulltext-1.0,,,
1,17,email_full_text,ONLINE,100.0,FULLTEXT,NODE,[Email],[email],fulltext-1.0,,,0.0
2,14,full_name_full_text,ONLINE,100.0,FULLTEXT,NODE,[PersonName],[fullName],fulltext-1.0,,2024-06-08T20:14:55.050000000+00:00,30.0
3,15,phone_number_full_text,ONLINE,100.0,FULLTEXT,NODE,[PhoneNumber],[phoneNumber],fulltext-1.0,,,0.0
4,16,ssn_full_text,ONLINE,100.0,FULLTEXT,NODE,[SocialSecurityNumber],[socialSecurityNumber],fulltext-1.0,,,0.0


In [0]:
def search_profile(dob, email, full_name, phone, ssn):
    result_df = gds.run_cypher("""
        CALL {
            CALL db.index.fulltext.queryNodes("dob_string_full_text", $dob + "~", {limit:5}) YIELD node, score
            MATCH (u:UserDescription)-[:HAS_DOB]->(node)
            RETURN u, score, 
            {property: "dob", matchType: CASE WHEN node.birthdateString = $dob THEN "Exact" ELSE "Fuzzy" END} AS matched

            UNION ALL

            CALL db.index.fulltext.queryNodes("email_full_text", $email + "~", {limit:5}) YIELD node, score
            MATCH (u:UserDescription)-[:HAS_EMAIL]->(node)
            RETURN u, score, 
            {property: "email", matchType: CASE WHEN node.email = $email THEN "Exact" ELSE "Fuzzy" END} AS matched

            UNION ALL

            CALL db.index.fulltext.queryNodes("full_name_full_text", $fullName + "~", {limit:5}) YIELD node, score
            MATCH (u:UserDescription)-[:HAS_NAME]->(node)
            RETURN u, score, 
            {property: "name", matchType: CASE WHEN node.fullName = $fullName THEN "Exact" ELSE "Fuzzy" END} AS matched

                    
            UNION ALL

            CALL db.index.fulltext.queryNodes("phone_number_full_text", $phone + "~", {limit:5}) YIELD node, score
            MATCH (u:UserDescription)-[:HAS_PHONE]->(node)
            RETURN u, score, 
            {property: "phone", matchType: CASE WHEN node.phoneNumber = $phone THEN "Exact" ELSE "Fuzzy" END} AS matched

            UNION ALL

            CALL db.index.fulltext.queryNodes("ssn_full_text", $ssn + "~", {limit:5}) YIELD node, score
            MATCH (u:UserDescription)-[:HAS_SOCIAL_SECURITY_NUMBER]->(node)
            RETURN u, score, 
            {property: "ssn", matchType: CASE WHEN node.socialSecurityNumber = $ssn THEN "Exact" ELSE "Fuzzy" END} AS matched
        }

        WITH u, avg(score) AS avgScore, collect(matched) AS cluesMatched
        WHERE size(cluesMatched) > 1

        RETURN u.userId AS userId, 
        [clue IN cluesMatched WHERE clue['matchType'] = 'Exact' | clue['property']] AS exactMatchedProperties,
        [clue IN cluesMatched WHERE clue['matchType'] = 'Fuzzy' | clue['property']] AS fuzzyMatchedProperties,
        avgScore,
        COLLECT{ MATCH (u)-[:HAS_DOB]->(n) RETURN n.birthdateString } AS birthdates,
        COLLECT{ MATCH (u)-[:HAS_EMAIL]->(n) RETURN n.email } AS emails,
        COLLECT{ MATCH (u)-[:HAS_NAME]->(n) RETURN n.fullName } AS fullNames,
        COLLECT{ MATCH (u)-[:HAS_PHONE]->(n) RETURN n.phoneNumber } AS phoneNumbers,
        COLLECT{ MATCH (u)-[:HAS_SOCIAL_SECURITY_NUMBER]->(n) RETURN n.socialSecurityNumber } AS socialSecurityNumbers
            
        ORDER BY size(cluesMatched) DESC, avgScore DESC
                """,
                {"dob": dob, "email": email, "fullName": full_name, "phone": phone, "ssn": ssn})
    return result_df

In [0]:
search_profile("1930-08-28", "dolored.laborumb32@ex.com", "Belitd, Iutk Rincididunts", "268-023-9381", "996-32-0300")

Unnamed: 0,userId,exactMatchedProperties,fuzzyMatchedProperties,avgScore,birthdates,emails,fullNames,phoneNumbers,socialSecurityNumbers
0,4fq0zh1j-0y31-9i2u-vjp7-z9h9gr8e59n4,"[dob, email, name, phone, ssn]",[],8.145221,[1930-08-28],[dolored.laborumb32@ex.com],"[Belitd, Iutk Rincididunts]",[268-023-9381],[996-32-0300]


In [0]:
search_profile("1933-08-28", "dolored.laborumb32@en.com", "Belitd, Iutk Rincididunts", "268-033-9381", "996-32-00300")

Unnamed: 0,userId,exactMatchedProperties,fuzzyMatchedProperties,avgScore,birthdates,emails,fullNames,phoneNumbers,socialSecurityNumbers
0,4fq0zh1j-0y31-9i2u-vjp7-z9h9gr8e59n4,[name],"[dob, email, phone, ssn]",7.718754,[1930-08-28],[dolored.laborumb32@ex.com],"[Belitd, Iutk Rincididunts]",[268-023-9381],[996-32-0300]


In [0]:
search_profile("1993-07-14", "xyz@abc.com", "Belitd, Iutk Rincididunts", "268-033-9381", "996-32-00300")

Unnamed: 0,userId,exactMatchedProperties,fuzzyMatchedProperties,avgScore,birthdates,emails,fullNames,phoneNumbers,socialSecurityNumbers
0,4fq0zh1j-0y31-9i2u-vjp7-z9h9gr8e59n4,[name],"[phone, ssn]",9.005496,[1930-08-28],[dolored.laborumb32@ex.com],"[Belitd, Iutk Rincididunts]",[268-023-9381],[996-32-0300]
