In [None]:
!pip install neo4j

Collecting neo4j
  Downloading neo4j-5.18.0.tar.gz (198 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/198.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m102.4/198.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m198.0/198.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (pyproject.toml) ... [?25l[?25hdone
  Created wheel for neo4j: filename=neo4j-5.18.0-py3-none-any.whl size=273862 sha256=6579be82b1f77464661e424f1a70abea8380274cef5eea040969d7c37a14fadd
  Stored in directory: /root/.cache/pip/wheels/e7/e1/a0/dd7c19192f5383

In [None]:
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.5.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.5.0


The below code involves fetching data from a Neo4j graph database using the Python Neo4j driver, extracting information about nodes and edges, and then closing the database connection. Below is an explanation of the overall process:

Importing Dependencies: The code begins by importing the necessary libraries, including GraphDatabase from the neo4j package.

Defining the GraphDataExtractor Class: This class encapsulates functionality related to extracting data from the Neo4j database. It contains the following methods:

__init__: The constructor method initializes the GraphDataExtractor object with the URI, username, and password required to establish a connection to the Neo4j database.

close: This method closes the connection to the Neo4j database.

fetch_graph_data: This method executes Cypher queries to retrieve data from the database. It runs two queries:

The first query retrieves information about nodes labeled as 'Job', including their IDs, names, and labels.
The second query retrieves information about edges connecting nodes labeled as 'Job' to nodes labeled as 'Skills', including the IDs of the source and target nodes.
The method returns the results of both queries as dictionaries.
Creating an Instance of GraphDataExtractor: An instance of the GraphDataExtractor class is created with the appropriate URI, username, and password to connect to the Neo4j database.

Fetching Graph Data: The fetch_graph_data method of the GraphDataExtractor instance is called to retrieve data from the Neo4j database. This data includes information about nodes and edges in the graph.

Closing the Connection: After fetching the graph data, the connection to the Neo4j database is closed using the close method of the GraphDataExtractor instance.

Overall, the provided code demonstrates how to use the Python Neo4j driver to interact with a Neo4j graph database, fetch data about nodes and edges, and then close the database connection. This process enables further analysis and manipulation of the graph data within a Python environment.

In [None]:
from neo4j import GraphDatabase

class GraphDataExtractor:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        self.driver.close()

    def fetch_graph_data(self):
        with self.driver.session() as session:
            nodes = session.run("MATCH (n:Job) RETURN id(n) as node_id, n.name as name, labels(n) as labels")
            edges = session.run("MATCH (n:Job)-[r]->(m:Skills) RETURN id(n) as source_id, id(m) as target_id")
            return nodes.data(), edges.data()

extractor = GraphDataExtractor("neo4j+s://d3a947d5.databases.neo4j.io", "neo4j", "SC3NYzP1w2IG2JfYgFqgaDq-BfB66uVZ4j6bOEMx7ik")
nodes, edges = extractor.fetch_graph_data()
extractor.close()

In [None]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

# Define your GCN model
class GCN(torch.nn.Module):
    def __init__(self, num_features, num_clusters):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, 64)
        self.conv2 = GCNConv(64, num_clusters)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return torch.log_softmax(x, dim=1)

# Example dummy data preparation
num_nodes = 1000  # Total number of nodes
num_node_features = 16  # Number of features per node
num_edges = 500  # Total number of edges

# Dummy features for nodes
x = torch.randn((num_nodes, num_node_features), dtype=torch.float)

# Dummy edge index (source and target nodes for each edge)
edge_index = torch.randint(0, num_nodes, (2, num_edges), dtype=torch.long)

# Assuming binary labels for a classification task
y = torch.randint(0, 2, (num_nodes,), dtype=torch.long)

# Creating a PyTorch Geometric Data object
data = Data(x=x, edge_index=edge_index, y=y)

# Now, the `data` object can be used to create and train a GCN model
number_of_communities = 5  # Example number of communities/classes
model = GCN(num_features=num_node_features, num_clusters=number_of_communities)


The provided code demonstrates the implementation of a simple Graph Convolutional Network (GCN) model using PyTorch Geometric library.

Let's break down the code step by step:

Importing Libraries: The code begins by importing the necessary libraries:

torch: The main PyTorch library.
torch_geometric.data: This module provides Data objects that are used to represent graph data.
torch_geometric.nn.GCNConv: This class implements the Graph Convolutional Layer, which is a building block for GCN models.
Defining the GCN Model:

The GCN class inherits from torch.nn.Module, making it a PyTorch neural network module.
In the constructor __init__(), the GCN model is defined with two graph convolutional layers (GCNConv):
self.conv1: The first convolutional layer takes as input the number of input features per node (num_features) and outputs 64 features.
self.conv2: The second convolutional layer takes the 64 features from the first layer and outputs num_clusters features. It's important to note that in this case, num_clusters is used as the number of output features of the GCN model.
The forward() method defines the forward pass of the model:
It takes a Data object containing node features (data.x) and edge indices (data.edge_index) as input.
The node features are passed through the first convolutional layer, followed by a ReLU activation function.
The resulting features are then passed through the second convolutional layer.
Finally, a log softmax function is applied along the second dimension to obtain the output of the model.
Dummy Data Preparation:

Dummy data is prepared to demonstrate the usage of the GCN model.
x: Random node features are generated with dimensions (num_nodes, num_node_features).
edge_index: Random edge indices are generated with dimensions (2, num_edges), representing source and target nodes for each edge.
y: Binary labels are randomly generated for each node.
Creating PyTorch Geometric Data Object:

A Data object is created to encapsulate the node features, edge indices, and labels.
Instantiating and Using the GCN Model:

An instance of the GCN model is created with the specified number of input features (num_node_features) and output features (num_clusters).
The data object is passed to the GCN model to perform a forward pass, obtaining the output predictions.

In [None]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from sklearn.cluster import KMeans

class GCN(torch.nn.Module):
    def __init__(self, num_features, num_clusters):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, 16)
        self.conv2 = GCNConv(16, num_clusters)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return x

num_nodes = len(nodes)
num_node_features = 16
num_edges = len(edges)
number_of_communities = 8


# Dummy features for nodes
x = torch.randn((num_nodes, num_node_features), dtype=torch.float)

# Dummy edge index (source and target nodes for each edge)
edge_index = torch.randint(0, num_nodes, (2, num_edges), dtype=torch.long)

# Assuming binary labels for a classification task
y = torch.randint(0, 2, (num_nodes,), dtype=torch.long)

data = Data(x=x, edge_index=edge_index, y=y)

# Assume `data` is your graph data in PyTorch Geometric format
model = GCN(num_features=data.num_node_features, num_clusters=number_of_communities)
out = model(data)
embeddings = out.detach().numpy()  # Get embeddings as numpy array

# Cluster the embeddings to detect communities
kmeans = KMeans(n_clusters=number_of_communities)
communities = kmeans.fit_predict(embeddings)

# `communities` now holds the community ID for each node



The write_communities_to_neo4j function establishes a connection to a Neo4j graph database using provided URI, username, and password, and then iterates through a list of community assignments for nodes. For each node, a Cypher query is executed to update its community property in the database. The function utilizes parameterized Cypher syntax to ensure security and efficiency of the database operations. Finally, the database connection is closed after all community assignments have been written. Overall, this function provides a straightforward and efficient way to update node properties in a Neo4j graph database based on community assignments.

In [None]:
def write_communities_to_neo4j(communities, uri, user, password):
    driver = GraphDatabase.driver(uri, auth=(user, password))
    with driver.session() as session:
        for node_id, community in enumerate(communities):
            session.run("MATCH (n) WHERE id(n) = $node_id SET n.community = $community",
                        node_id=node_id, community=community)
    driver.close()

In [None]:
write_communities_to_neo4j(communities, "neo4j+s://d3a947d5.databases.neo4j.io", "neo4j", "SC3NYzP1w2IG2JfYgFqgaDq-BfB66uVZ4j6bOEMx7ik")

This code snippet utilizes Plotly and NetworkX libraries to visualize a graph represented by edges with nodes and edges. It constructs a NetworkX graph G from the provided edge list, determines the layout of nodes using the spring layout algorithm, and ensures that the number of communities matches the number of nodes. It assigns colors to nodes based on their community, with a default color assigned to unassigned nodes. It then creates a Plotly figure and adds nodes and edges to it, with each node represented as a scatter point and each edge as a line. Finally, it updates layout options and displays the plot, providing a clear visualization of the graph and its communities.

In [21]:
import plotly.graph_objects as go
import networkx as nx
import numpy as np

# Create a NetworkX graph from the edge list
G = nx.Graph()
for edge in edges:
    source_id, target_id = edge['source_id'], edge['target_id']
    G.add_edge(source_id, target_id)

# Create a layout for the graph
pos = nx.spring_layout(G)

# Ensure the length of communities matches the number of nodes
print("Number of nodes in the graph:", len(G.nodes))
print("Number of communities detected:", len(set(communities)))

if len(set(communities)) < len(G.nodes):
    # Assign a default community to nodes that are not assigned
    default_community = number_of_communities
    communities = np.append(communities, np.full(len(G.nodes) - len(set(communities)), default_community))
    print("Number of communities after assigning default:", len(set(communities)))

# Create a list of colors for each community
colors = ['rgb({},{},{})'.format(r, g, b) for r, g, b in np.random.randint(0, 255, (number_of_communities + 1, 3))]  # +1 for default community

# Assign colors to nodes based on their community
node_color = [colors[community] for community in communities]

# Pad the node_color list to ensure it has enough elements
node_color += ['rgb(0,0,0)'] * (len(G.nodes) - len(node_color))

# Create a Plotly figure
fig = go.Figure()

# Add nodes to the plot
for node_id, (x, y) in pos.items():
    fig.add_trace(go.Scatter(x=[x], y=[y], mode='markers', marker=dict(color=node_color[node_id]), text=str(node_id)))

# Add edges to the plot
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    fig.add_trace(go.Scatter(x=[x0, x1], y=[y0, y1], mode='lines', line=dict(color='gray')))

# Update layout options
fig.update_layout(showlegend=False)

# Show the plot
fig.show()


Number of nodes in the graph: 2016
Number of communities detected: 9
Number of communities after assigning default: 9
