In [20]:
from dotenv import load_dotenv
import os
load_dotenv()
Neo4j_Pass = os.environ['NEO4J_PASS']

In [22]:
from neo4j import GraphDatabase
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Connect to Neo4j
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", Neo4j_Pass))

def create_data(tx):
    query = """

           
    CREATE (c1:Customer {id: 'c1', name: 'Alice'}),
       (c2:Customer {id: 'c2', name: 'Bob'}),
       (c3:Customer {id: 'c3', name: 'Charlie'}),
       (p1:Product {id: 'p1', name: 'ProductA'}),
       (p2:Product {id: 'p2', name: 'ProductB'}),
       (c1)-[:MADE_PURCHASE {amount: 100}]->(p1),
       (c1)-[:MADE_PURCHASE {amount: 150}]->(p2),
       (c2)-[:MADE_PURCHASE {amount: 200}]->(p1),
       (c3)-[:MADE_PURCHASE {amount: 50}]->(p2),
       (c1)-[:REFERRED_BY]->(c2),
       (c2)-[:REFERRED_BY]->(c3);

    """    
    tx.run(query)
    query= """ 
    
    CREATE (c1)-[:CHURNED]->(c1),
           (c3)-[:CHURNED]->(c3);
    """    
    tx.run(query)
    
    return #data

def extract_customer_data(tx):
    query = """

           
    MATCH (c:Customer)
    OPTIONAL MATCH (c)-[r:MADE_PURCHASE]->(p:Product)
    OPTIONAL MATCH (c)<-[:REFERRED_BY]-(referrer:Customer)
    OPTIONAL MATCH (c)-[:CHURNED]->(churned:Customer)
    RETURN c.id AS customer_id, 
           COALESCE(COUNT(r), 0) AS num_transactions, 
           COALESCE(AVG(r.amount), 0) AS avg_transaction_amount, 
           CASE WHEN COUNT(churned) > 0 THEN 1 ELSE 0 END AS churned
    """
    result = tx.run(query)
    data = [record.data() for record in result]
    return data

def main():
    with driver.session() as session:
        session.write_transaction(create_data)
        data = session.read_transaction(extract_customer_data)

    df = pd.DataFrame(data).dropna()
    print(df)
    # Features and Labels
    X = df[['num_transactions', 'avg_transaction_amount']]
    y = df['churned']

    # Train-Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Model Training
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Prediction and Evaluation
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))

if __name__ == "__main__":
    main()

  customer_id  num_transactions  avg_transaction_amount  churned
1          c1                10                   125.0        0
2          c2                 5                   200.0        0
3          c3                 5                    50.0        0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



  session.write_transaction(create_data)
  data = session.read_transaction(extract_customer_data)
