# Upload the dataset on neo4j

how to connect to your database: https://neo4j.com/docs/getting-started/languages-guides/neo4j-python/

how to import data: https://neo4j.com/docs/python-manual/current/query-simple/

best practices: https://neo4j.com/developer-blog/neo4j-driver-best-practices/

In [3]:
!pip install neo4j



In [4]:
import csv
dir_path = "data_csv/"

with open(dir_path + 'customers.csv') as f:
    customers = list(csv.DictReader(f))

with open(dir_path + 'terminals.csv') as f:
    terminals = list(csv.DictReader(f))

with open(dir_path + 'transactions.csv') as f:
    transactions = list(csv.DictReader(f))


In [6]:
from neo4j import GraphDatabase
from datetime import datetime


# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:

    driver.verify_connectivity()
    
    # CREATE CONSTRAINT ON (c:Customer) ASSERT c.CUSTOMER_ID IS UNIQUE
    # CREATE CONSTRAINT ON (t:Terminal) ASSERT t.TERMINAL_ID IS UNIQUE
    # CREATE CONSTRAINT ON ()-[r:MADE_TRANSACTION]->() ASSERT r.TRANSACTION_ID IS UNIQUE

    try:
        # Create some nodes
        # La query MERGE è utile in quanto verifica se il nodo esiste già, sulla base delle condizioni specificate,
        # e lo unisce se esiste o lo crea se non esiste.
        for customer in customers:
            records, summary, keys = driver.execute_query(
                #CREATE CONSTRAINT ON (c:Customer) ASSERT c.CUSTOMER_ID IS UNIQUE
                """
                MERGE (c:Customer {CUSTOMER_ID: toInteger($customer.CUSTOMER_ID), x_customer_id: toFloat($customer.x_customer_id),y_customer_id: toFloat($customer.y_customer_id), mean_amount: toFloat($customer.mean_amount), std_amount: toFloat($customer.std_amount), mean_nb_tx_per_day: toFloat($customer.mean_nb_tx_per_day), available_terminals: $customer.available_terminals, nb_terminals: toInteger($customer.nb_terminals)})
                """,
                customer=customer,
                database_="neo4j",
            )

        for terminal in terminals:
            records, summary, keys = driver.execute_query(
                #CREATE CONSTRAINT ON (t:Terminal) ASSERT t.TERMINAL_ID IS UNIQUE
                """
                MERGE (t:Terminal {TERMINAL_ID: toInteger($terminal.TERMINAL_ID), x_terminal_id: toFloat($terminal.x_terminal_id),y_terminal_id: toFloat($terminal.y_terminal_id)})
                """,
                terminal=terminal,
                database_="neo4j",
            )

        # Create some relationships
        for transaction in transactions:
            transaction['TX_DATETIME'] = datetime.strptime(transaction['TX_DATETIME'], "%Y-%m-%d %H:%M:%S")
            records, summary, keys = driver.execute_query(
                #CREATE CONSTRAINT ON ()-[r:MADE_TRANSACTION]->() ASSERT r.TRANSACTION_ID IS UNIQUE
                """
                MATCH (customer:Customer {CUSTOMER_ID: toInteger($transaction.CUSTOMER_ID)})
                MATCH (terminal:Terminal {TERMINAL_ID: toInteger($transaction.TERMINAL_ID)})
                MERGE (customer)-[:MADE_TRANSACTION {TRANSACTION_ID: toInteger($transaction.TRANSACTION_ID), TX_DATETIME: datetime($transaction.TX_DATETIME), TX_AMOUNT: toFloat($transaction.TX_AMOUNT), TX_TIME_SECONDS: toInteger($transaction.TX_TIME_SECONDS), TX_TIME_DAYS: toInteger($transaction.TX_TIME_DAYS), TX_FRAUD: toInteger($transaction.TX_FRAUD), TX_FRAUD_SCENARIO: toInteger($transaction.TX_FRAUD_SCENARIO)}]->(terminal)
                """,
                transaction=transaction,
                database_="neo4j",
            )

    except Exception as e:
        print(e)


# Queries

### Query 1

For each customer checks that the spending frequency and the spending amounts of the last
month is under the usual spending frequency and the spending amounts for the same
period.

In [8]:
from datetime import datetime, timedelta

from neo4j import GraphDatabase
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")

now = datetime.now()
start_of_current_month = datetime(now.year, now.month, 1)
end_of_last_month = datetime(now.year, now.month, 1) - timedelta(days=1)
start_of_last_month = datetime(end_of_last_month.year, end_of_last_month.month, 1)

print(start_of_last_month)
print(start_of_current_month)

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    try:
        parameters = {
            "start_of_last_month": start_of_last_month.strftime("%Y-%m-%dT%H:%M:%S"),
            "start_of_current_month": start_of_current_month.strftime("%Y-%m-%dT%H:%M:%S"),
        }
        records, summary, keys = driver.execute_query(
            """
            MATCH (c:Customer)-[t:MADE_TRANSACTION]->(:Terminal)
            WHERE t.TX_DATETIME >= datetime($parameters.start_of_last_month) AND t.TX_DATETIME < datetime($parameters.start_of_current_month)
            WITH c, COUNT(t) AS lastMonthFrequency, SUM(toFloat(t.TX_AMOUNT)) AS lastMonthAmount
            RETURN c.CUSTOMER_ID AS customerId,
                   CASE
                      WHEN lastMonthFrequency < c.mean_nb_tx_per_day
                      THEN "under the usual"
                      ELSE "over the usual"
                   END AS spending_frequency,
                   CASE
                      WHEN lastMonthAmount < c.mean_amount
                      THEN "under the usual"
                      ELSE "is over the usual"
                   END AS spending_amounts
            """,
            parameters=parameters,
            database="neo4j"
        )

        # Loop on result
        for record in records:
            print(record)

        print("La query ha restituito {records_count} record in {time} ms.".format(
            records_count=len(records),
            time=summary.result_consumed_after    #result_available_after
        ))

    except Exception as e:
        print(e)


2024-01-01 00:00:00
2024-02-01 00:00:00
<Record customerId=0 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=1 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=2 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=3 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=4 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=5 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=6 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=7 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=8 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record customerId=9 spending_frequency='over the usual' spending_amounts='is over the usual'>
<Record cu

### Query 2

For each terminal identify the possible fraudulent transactions. The fraudulent transactions
are those whose import is higher than 20% of the maximal import of the transactions
executed on the same terminal in the last month.

In [38]:
from neo4j import GraphDatabase

URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    try:
        records, summary, keys = driver.execute_query(
            """
            MATCH (:Customer)-[t:MADE_TRANSACTION]->(tm:Terminal)<-[fraud:MADE_TRANSACTION]-(:Customer)
            WHERE t.TX_DATETIME >= fraud.TX_DATETIME - duration({months: 1}) AND t.TX_DATETIME < fraud.TX_DATETIME
            WITH tm, fraud, MAX(t.TX_AMOUNT) AS max_amount
            WHERE fraud.TX_AMOUNT > max_amount*1.2
            RETURN tm.TERMINAL_ID as terminalId,
                   fraud.TRANSACTION_ID as transactionId,
                   fraud.TX_AMOUNT as tx_amount,
                   max_amount
            ORDER BY tm.TERMINAL_ID
            """,
            database="neo4j"
        )
        
        """ se vuoi che vengano raggrupparti per terminalID usa questo, meno leggibile però
        RETURN DISTINCT tm.TERMINAL_ID as terminalId,
               COLLECT(fraud.TRANSACTION_ID) as transactionsId,
               
                   //opzionali
                   COLLECT(fraud.TX_AMOUNT) as tx_amounts,
                   COLLECT(max_amount) as max_amount
        """

        # Loop on result
        for record in records:
            print(record)

        print("La query ha restituito {records_count} record in {time} ms.".format(
            records_count=len(records),
            time=summary.result_consumed_after    #result_available_after
        ))

    except Exception as e:
        print(e)

<Record terminalId=0 transactionId=96746 tx_amount=166.38 max_amount=136.86>
<Record terminalId=0 transactionId=2304 tx_amount=171.3 max_amount=83.75>
<Record terminalId=1 transactionId=13466 tx_amount=101.67 max_amount=34.94>
<Record terminalId=1 transactionId=38505 tx_amount=174.96 max_amount=126.83>
<Record terminalId=1 transactionId=4089 tx_amount=34.94 max_amount=9.96>
<Record terminalId=2 transactionId=46786 tx_amount=209.35 max_amount=163.38>
<Record terminalId=2 transactionId=4873 tx_amount=65.34 max_amount=38.07>
<Record terminalId=2 transactionId=17496 tx_amount=122.14 max_amount=81.18>
<Record terminalId=2 transactionId=31064 tx_amount=163.38 max_amount=122.14>
<Record terminalId=2 transactionId=85464 tx_amount=353.65000000000003 max_amount=231.29999999999998>
<Record terminalId=3 transactionId=13898 tx_amount=169.09 max_amount=115.34>
<Record terminalId=3 transactionId=905 tx_amount=72.45 max_amount=43.8>
<Record terminalId=3 transactionId=163857 tx_amount=456.1 max_amount=

### Query 3

Given a user u, determine the “co-customer-relationships CC of degree k”. A user u’ is a co-
customer of u if you can determine a chain “u1-t1-u2-t2-…tk-1-uk“ such that u1=u, uk=u’, and for
each 1<=I,j<=k, ui <> uj, and t1,..tk-1 are the terminals on which a transaction has been
executed. Therefore, CCk(u)={u’| a chain exists between u and u’ of degree k}. Please, note
that depending on the adopted model, the computation of CCk(u) could be quite
complicated. Consider therefore at least the computation of CC3(u) (i.e. the co-costumer
relationships of degree 3).

grado tre significa:
(u:Customer)-[:MADE_TRANSACTION]->(:Terminal)<-[:MADE_TRANSACTION]-(:Customer)-[:MADE_TRANSACTION]->(:Terminal)<-[:MADE_TRANSACTION]-(u3:Customer)

contiamo 4 relationship di tipo transaction
            
MATCH p=(c1:Customer {CUSTOMER_ID: toInteger($parameters.user_id)})-[:MADE_TRANSACTION*2]-(c2:Customer)
            WHERE c1 <> c2
            UNWIND nodes(p) as n
            WITH p, c2, count(DISTINCT n) as num
            RETURN DISTINCT c2.CUSTOMER_ID as customerId, min(num)
            LIMIT 1000
            
non so perchè ma senza limit carica all'infinito, anche se ritorna 40 tuple in ogni caso...

In [39]:
from neo4j import GraphDatabase
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")

#given a user
user_id = "390"
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    try:
        parameters = {
            "user_id": user_id,
        }
        records, summary, keys = driver.execute_query(
            """
            MATCH p=(c1:Customer {CUSTOMER_ID: toInteger($parameters.user_id)})-[:MADE_TRANSACTION*4]-(c2:Customer)
            WHERE c1 <> c2
            RETURN DISTINCT c2.CUSTOMER_ID as customerId
            LIMIT 1000
            """,
            parameters=parameters,
            database="neo4j"
        )
    

        # Loop on result
        for record in records:
            print(record)

        print("La query ha restituito {records_count} record in {time} ms.".format(
            records_count=len(records),
            time=summary.result_consumed_after    #result_available_after
        ))

    except Exception as e:
        print(e)

<Record customerId=11>
<Record customerId=624>
<Record customerId=128>
<Record customerId=907>
<Record customerId=521>
<Record customerId=461>
<Record customerId=379>
<Record customerId=340>
<Record customerId=336>
<Record customerId=63>
<Record customerId=167>
<Record customerId=927>
<Record customerId=9>
<Record customerId=679>
<Record customerId=154>
<Record customerId=34>
<Record customerId=479>
<Record customerId=134>
<Record customerId=959>
<Record customerId=657>
<Record customerId=926>
<Record customerId=208>
<Record customerId=633>
<Record customerId=894>
<Record customerId=885>
<Record customerId=844>
<Record customerId=858>
<Record customerId=536>
<Record customerId=902>
<Record customerId=677>
<Record customerId=547>
<Record customerId=64>
<Record customerId=476>
<Record customerId=846>
<Record customerId=72>
<Record customerId=0>
<Record customerId=764>
<Record customerId=341>
<Record customerId=362>
<Record customerId=919>
La query ha restituito 40 record in 1406 ms.


Extend the logical model that you have stored in the NOSQL database by introducing the
following information:

  i. Each transaction should be extended with:
      1. The period of the day {morning, afternoon, evening, night} in which the
         transaction has been executed.
      2. The kind of products that have been bought through the transaction {high-
         tech, food, clothing, consumable, other}
      3. The feeling of security expressed by the user. This is an integer value
         between 1 and 5 expressed by the user when conclude the transaction.
  The values can be chosen randomly.

In [40]:
from neo4j import GraphDatabase
import random

URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    try:
        # siccome le transazioni son troppe dobbiamo scomporre la query in batch più piccoli
        records, summary, keys = driver.execute_query(
            """
            MATCH (:Customer)-[t:MADE_TRANSACTION]->(:Terminal) 
            RETURN t.TRANSACTION_ID
            """,
            database_="neo4j"
        )
        
        transaction_ids = [record["t.TRANSACTION_ID"] for record in records]
        BATCH_SIZE = 1000
        
        for i in range(0, len(transaction_ids), BATCH_SIZE):
            batch = transaction_ids[i:i+BATCH_SIZE]
            records, summary, keys = driver.execute_query(
                """
                UNWIND $batch AS trans
                WITH collect(trans) AS transactionList
                MATCH (:Customer)-[t:MADE_TRANSACTION]->(:Terminal)  WHERE t.TRANSACTION_ID IN transactionList
                SET
                    t.period_of_day = 
                        CASE
                            WHEN datetime(t.TX_DATETIME).hour >= 6 AND datetime(t.TX_DATETIME).hour < 12 THEN "morning"
                            WHEN datetime(t.TX_DATETIME).hour >= 12 AND datetime(t.TX_DATETIME).hour < 18 THEN "afternoon"
                            WHEN datetime(t.TX_DATETIME).hour >= 18 AND datetime(t.TX_DATETIME).hour < 24 THEN "evening"
                            ELSE "night"
                        END,
                        
                    t.product_type = 
                        CASE
                          WHEN $rand < 0.2 THEN "high-tech"
                          WHEN $rand < 0.4 THEN "food"
                          WHEN $rand < 0.6 THEN "clothing"
                          WHEN $rand < 0.8 THEN "consumable"
                          ELSE "other"
                        END,
                        
                    t.security_feeling = toInteger(rand() * 5) + 1
                """,
                batch=batch,
                rand=random.random(),
                database_="neo4j"
            )

        print("La query ha restituito {records_count} record in {time} ms.".format(
            records_count=len(records),
            time=summary.result_consumed_after    #result_available_after
        ))

    except Exception as e:
        print(e)


La query ha restituito 0 record in 0 ms.


 ii.  Customers that make more than three transactions from the same terminal
      expressing a similar average feeling of security should be connected as
      “buying_friends”. Therefore also this kind of relationship should be explicitly stored
      in the NOSQL database and can be queried. Note, two average feelings of security
      are considered similar when their difference is lower than 1.
      
WITH c1, c2, AVG(t1.security_feeling) AS avg1, AVG(t2.security_feeling) AS avg2
            WHERE abs(avg1 - avg2) < 1
            RETURN c1.CUSTOMER_ID, c2.CUSTOMER_ID
            

MATCH p=(c1:Customer)-[:MADE_TRANSACTION]->(t:Terminal),
                  p2=(c2:Customer)-[:MADE_TRANSACTION]->(t:Terminal)
            WITH c1, c2, count(p) as count1, count(p2) as count2
            WHERE count1 > 3 AND count2 > 3
            RETURN c1.CUSTOMER_ID, c2.CUSTOMER_ID, count1, count2


In [55]:
from neo4j import GraphDatabase
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    try:
        records, summary, keys = driver.execute_query(
            """
            MATCH p=(c1:Customer)-[t1:MADE_TRANSACTION]->(t:Terminal)<-[t2:MADE_TRANSACTION]-(c2:Customer)
            WHERE c1 <> c2
            WITH c1, c2, t, count(DISTINCT p) as num
            RETURN c1.CUSTOMER_ID, c2.CUSTOMER_ID, num, t.TERMINAL_ID
            """,
            database_="neo4j"
        )

        # Loop on result
        for record in records:
            print(record)

        print("La query ha restituito {records_count} record in {time} ms.".format(
            records_count=len(records),
            time=summary.result_consumed_after    #result_available_after
        ))

    except Exception as e:
        print(e)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)




<Record c1.CUSTOMER_ID=858 c2.CUSTOMER_ID=885 num=330 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=844 c2.CUSTOMER_ID=885 num=300 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=677 c2.CUSTOMER_ID=9 num=645 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=633 c2.CUSTOMER_ID=9 num=405 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=927 c2.CUSTOMER_ID=9 num=240 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=902 c2.CUSTOMER_ID=9 num=105 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=885 c2.CUSTOMER_ID=9 num=450 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=894 c2.CUSTOMER_ID=9 num=270 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=9 c2.CUSTOMER_ID=9 num=210 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=858 c2.CUSTOMER_ID=9 num=165 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=844 c2.CUSTOMER_ID=9 num=150 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=677 c2.CUSTOMER_ID=894 num=774 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=633 c2.CUSTOMER_ID=894 num=486 t.TERMINAL_ID=824>
<Record c1.CUSTOMER_ID=927 c2.CUSTOMER_ID=894 num=288 t.TERMINAL_ID=82

In [ ]:
#code to print a list of prime numbers
