In [31]:
import networkx as nx
import numpy as np
import scanpy as sc
import squidpy as sq
from sklearn.metrics import r2_score
from torch_geometric.nn import GCNConv, Sequential
from torch_geometric.data import Data   # Create data containers
from torch_geometric.utils import from_networkx

import torch
from torch.utils.data import Dataset
from torch_geometric.data import Data
from torch_geometric.utils import subgraph
from torch_geometric.loader import DataLoader
from torch_geometric.utils.convert import from_scipy_sparse_matrix
from tqdm.auto import tqdm

In [32]:
adata = sc.read("../example_files/img_1199670929.h5ad")

In [33]:
sq.gr.spatial_neighbors(adata=adata, radius=20, key_added="adjacency_matrix", coord_type="generic")
edge_index, edge_weight = from_scipy_sparse_matrix(adata.obsp["adjacency_matrix_connectivities"])
x = torch.tensor(adata.X.toarray(), dtype=torch.double)
print(f"mean node degree: {edge_index.shape[1]/len(adata):.1f}")

mean node degree: 3.9


In [4]:
data = Data(x=x, edge_index=edge_index)
#data
#edge_index

In [5]:
subgraph(torch.tensor([0, 10, 33]), edge_index=edge_index)

(tensor([], size=(2, 0), dtype=torch.int64), None)

In [6]:
# we want to create small subgraph using each node as the center

In [41]:
# Create a large graph
G = nx.Graph()

# Add nodes with features to the graph
for i, features in enumerate(adata.X.toarray()):
    G.add_node(i, features=features)

# Add edges to the graph
G.add_edges_from(edge_index.t().tolist())

In [42]:
# create subgraphs from each node of G using networkx
subgraphs = []
for node in tqdm(G.nodes()):
    subgraphs.append(nx.ego_graph(G, node, radius = 1))

  0%|          | 0/26230 [00:00<?, ?it/s]

In [43]:
# mean number of nodes per subgraph
np.mean([graph.number_of_nodes() for graph in subgraphs])

4.924590163934426

In [44]:
#torch.tensor(list(subgraphs[0].edges)).t()

In [98]:
# create pytorch geometric dataset from subgraphs
#datasss = [Data(x=torch.tensor(graph.nodes(data="features"), dtype=torch.double), edge_index=torch.tensor(list(graph.edges)).t()) for graph in tqdm(subgraphs)]

#list(subgraphs[0].features)
daata = [from_networkx(graph, group_node_attrs=['features']) for graph in tqdm(subgraphs)]
loader = DataLoader(daata, batch_size=32, shuffle=False)
#for daat in daata:
#loader = DataLoader([from_networkx(graph, group_node_attrs=['features'], dtype=torch.double) for graph in tqdm(subgraphs)], batch_size=32, shuffle=True)


#loader = DataLoader([Data(x=x, edge_index=torch.tensor(list(subgraphs[0].edges)).t()) for graph in subgraphs], batch_size=32)

  0%|          | 0/26230 [00:00<?, ?it/s]

In [58]:
# create pytorch geometric dataset from subgraphs
#dataset = [Data(x=torch.tensor(graph.nodes(data="features"), dtype=torch.double), edge_index=torch.tensor(list(graph.edges)).t()) for graph in tqdm(subgraphs)]

In [107]:
for data in loader:
    print(data)


DataBatch(edge_index=[2, 146], x=[88, 550], batch=[88], ptr=[33])
DataBatch(edge_index=[2, 192], x=[96, 550], batch=[96], ptr=[33])
DataBatch(edge_index=[2, 236], x=[107, 550], batch=[107], ptr=[33])
DataBatch(edge_index=[2, 542], x=[155, 550], batch=[155], ptr=[33])
DataBatch(edge_index=[2, 642], x=[172, 550], batch=[172], ptr=[33])
DataBatch(edge_index=[2, 400], x=[130, 550], batch=[130], ptr=[33])
DataBatch(edge_index=[2, 464], x=[147, 550], batch=[147], ptr=[33])
DataBatch(edge_index=[2, 540], x=[159, 550], batch=[159], ptr=[33])
DataBatch(edge_index=[2, 500], x=[154, 550], batch=[154], ptr=[33])
DataBatch(edge_index=[2, 584], x=[168, 550], batch=[168], ptr=[33])
DataBatch(edge_index=[2, 426], x=[138, 550], batch=[138], ptr=[33])
DataBatch(edge_index=[2, 934], x=[226, 550], batch=[226], ptr=[33])
DataBatch(edge_index=[2, 800], x=[208, 550], batch=[208], ptr=[33])
DataBatch(edge_index=[2, 972], x=[234, 550], batch=[234], ptr=[33])
DataBatch(edge_index=[2, 1202], x=[251, 550], batch=

In [71]:
import torch
from torch_geometric.nn import GCN, summary

#model = GCN(-1, 64, num_layers=2, out_channels=550)
#x = torch.randn(100, 128)
#edge_index = torch.randint(100, size=(2, 20))

#print(summary(model, data.x, data.edge_index))

In [102]:

from torch import nn, optim, Tensor
from torch_geometric.nn import conv


# Define the Graph Convolutional Network (GCN) model
class GCNClassifier(torch.nn.Module):
    def __init__(self, hidden_dim, hidden_dim1, output_dim):
        super(GCNClassifier, self).__init__()
        self.model = nn.Sequential(
            conv.SAGEConv(-1, hidden_dim),
            nn.ReLU(),
            conv.GCNConv(hidden_dim, hidden_dim1),
            nn.ReLU(),
            nn.Linear(hidden_dim1, output_dim),
        )

    def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
        return self.model(x, edge_index)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)
        self.linear = nn.Linear(out_channels, 550)


    def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
        # x: Node feature matrix of shape [num_nodes, in_channels]
        # edge_index: Graph connectivity matrix of shape [2, num_edges]
        x = self.conv1(x, edge_index).relu()
        print(x.shape)
        x = self.conv2(x, edge_index)
        print(x.shape)
        x = self.linear(x)
        print(x.shape)
        return x

In [108]:


# Set device for training, macbook
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create an instance of the model and move it to the device

output_dim = 550

# Create the model
model = GCN(-1, 64, output_dim).to(device)

#GCNClassifier(hidden_dim=100, hidden_dim1=100, output_dim=550).to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.00001)

# Training loop
num_epochs = 100

# store losses
train_losses = []
val_losses = []

# store r2 scores
train_r2_scores = []
val_r2_scores = []

best_val_loss = float('inf') # Set initial best validation loss to infinity
patience = 5                # Number of epochs to wait for improvement in validation loss
epochs_no_improve = 0        # Number of epochs with no improvement in validation loss
best_epoch = 0               # Epoch at which we get the best validation loss

# epoch training times
epoch_times = []
#start_time = time.time()

for epoch in range(num_epochs):

    model.train()  # Set the model to training mode
    total_loss = 0
    targets_list = []
    outputs_list = []

    for data in loader:
        # Transfer data to GPU
        data = data.to(device)

        num_nodes = len(adata.x)
        # Determine the number of nodes to mask (20% of total nodes)
        num_nodes_to_mask = int(0.2 * num_nodes)

        # Randomly select nodes to be masked
        nodes_to_mask = random.sample(range(num_nodes), num_nodes_to_mask)
        # Create a mask tensor to identify the selected nodes
        mask = torch.zeros(len(data), dtype=torch.bool)
        mask[nodes_to_mask] = True

        # Apply the mask to the node features
        masked_node_features = node_features * ~mask.unsqueeze(-1).float()


        # Forward pass
        outputs = model(data.x.float(), data.edge_index.long())
        print(data)
        #print(outputs.shape)
        loss = criterion(outputs, data.x.float())

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Measure train loss and r2 score
        total_loss += loss.item() * data.num_graphs
        targets_list.append(data.y)
        outputs_list.append(outputs)

    #measure and print r2 and train loss
    train_loss = total_loss / len(loader.dataset)
    train_losses.append(train_loss)
    train_r2 = r2_score(torch.cat(targets_list).cpu().detach().numpy(), torch.cat(outputs_list).cpu().detach().numpy())
    train_r2_scores.append(train_r2)
    print(f"Epoch {epoch+1}/{num_epochs}, train loss: {train_loss:.4f}, train r2: {train_r2:.4f}")








AttributeError: 'AnnData' object has no attribute 'x'

In [78]:
class SpatialGraphDataset(Dataset):
    def __init__(self, adata, radius=30, coord_type="generic"):
        self.adata = adata
        self.radius = radius
        self.coord_type = coord_type
        sq.gr.spatial_neighbors(adata=adata, radius=radius, key_added="adjacency_matrix", coord_type=coord_type)
        self.edge_index, _ = from_scipy_sparse_matrix(adata.obsp["adjacency_matrix_connectivities"])
        self.x = torch.tensor(adata.X.toarray(), dtype=torch.float)
        self.y = torch.tensor(adata.obs["total_counts"].values, dtype=torch.float)
        self.n_nodes = len(adata)
        self.n_features = adata.n_vars
        self.n_labels = 1
        self.n_edges = self.edge_index.shape[1]
        self.n_graphs = 1
        self.n_classes = 1
        self.n_samples = 1
        self.n_node_features = self.n_features
        self.n_edge_features = 1
        self.n_graph_features = 1
        self.n

(tensor([], size=(2, 0), dtype=torch.int64), None)

IndexError: Target 3 is out of bounds.