In [1]:
import squidpy as sq

adata = sq.datasets.seqfish()

In [9]:
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from scipy.sparse import hstack

labels = adata.obs['celltype_mapped_refined'].cat.codes.values
X_with_spatial = hstack((adata.X, adata.obsm['spatial']))

X_train, X_test, y_train, y_test = train_test_split(
    adata.X, labels, test_size=0.3,)

X_train_spatial, X_test_spatial, y_train_spatial, y_test_spatial = train_test_split(
    X_with_spatial, labels, test_size=0.3,)

X_train_spatial_only, X_test_spatial_only, y_train_spatial_only, y_test_spatial_only = train_test_split(
    adata.obsm['spatial'], labels, test_size=0.3,)



In [10]:

classifier_x_only = svm.SVC(kernel='rbf')
classifier_x_only.fit(X_train, y_train)
y_pred = classifier_x_only.predict(X_test)
print("Accuracy (RBF SVC):", metrics.accuracy_score(y_test, y_pred))

classifier_x_plus_spatial = svm.SVC(kernel='rbf')
classifier_x_plus_spatial.fit(X_train_spatial, y_train_spatial)
y_pred = classifier_x_plus_spatial.predict(X_test_spatial)
print("Accuracy (RBF SVC, Spatial):", metrics.accuracy_score(y_test_spatial, y_pred))


Accuracy (RBF SVC): 0.8439484978540772
Accuracy (RBF SVC, Spatial): 0.8357081545064378


In [11]:
# Classify based solely on spatial information

classifier_spatial_only = svm.SVC(kernel='rbf')
classifier_spatial_only.fit(X_train_spatial_only, y_train_spatial_only)
y_pred = classifier_spatial_only.predict(X_test_spatial_only)
print("Accuracy (RBF SVC, Spatial only):", metrics.accuracy_score(y_test_spatial_only, y_pred))


Accuracy (RBF SVC, Spatial only): 0.5769957081545064


In [5]:
import numpy as np
import math

theta = math.pi / 2
R = np.array([[math.cos(theta), -math.sin(theta)],
             [math.sin(theta), math.cos(theta)]])
X_test_spatial_only_rotated = np.matmul(X_test_spatial_only, R)
y_pred = classifier_spatial_only.predict(X_test_spatial_only_rotated)
print("Accuracy (RBF SVC, Spatial only, Rotated):",
      metrics.accuracy_score(y_test_spatial_only, y_pred))


Accuracy (RBF SVC, Spatial only, Rotated): 0.03690987124463519


In [7]:
from scipy.sparse import csr_matrix

theta = 0 * math.pi / 2
R = np.array([[math.cos(theta), -math.sin(theta)],
             [math.sin(theta), math.cos(theta)]])
X_test_spatial_rotated = hstack(
    (X_test_spatial[:, :-2], X_test_spatial[:, -2:] @ csr_matrix(R)))
y_pred = classifier_x_plus_spatial.predict(X_test_spatial_rotated)
print("Accuracy (RBF SVC, Spatial, Rotated):",
      metrics.accuracy_score(y_test_spatial, y_pred))


Accuracy (RBF SVC, Spatial, Rotated): 0.18729613733905578


In [17]:
import random
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.utils import from_scipy_sparse_matrix
from GNN import MPNNModel

device = 'cuda' if torch.cuda.is_available() else 'cpu'

if 'spatial_distances' not in adata.obsp:
    sq.gr.spatial_neighbors(adata)

train_mask_size = int(adata.X.shape[0] * 0.7)
shuffled_indices = list(range(adata.X.shape[0]))
random.shuffle(shuffled_indices)
train_mask = torch.tensor(shuffled_indices[:train_mask_size], dtype=torch.long)
test_mask = torch.tensor(shuffled_indices[train_mask_size:], dtype=torch.long)

def train(model, data, train_mask, test_mask):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.train()
    epochs = 200
    for epoch in range(epochs):
        out = model(data)
        predictions = torch.argmax(out, dim=1)
        loss = F.cross_entropy(out[train_mask], data.y[train_mask])
        loss.backward()
        optimizer.step()
        train_acc = (predictions[train_mask] ==
                     data.y[train_mask]).sum() / len(train_mask)
        test_acc = (predictions[test_mask] ==
                    data.y[test_mask]).sum() / len(test_mask)
        print(
            f'Epoch: {epoch} - Loss: {loss} - Train Accuracy: {train_acc} - Test Accuracy: {test_acc}')




In [8]:
device = 'cpu'

# Build the graph for the invariant GNN
edge_index, edge_attr = from_scipy_sparse_matrix(
    adata.obsp['spatial_distances'])
edge_attr = edge_attr.view(-1, 1).float()
x_index, x_values = from_scipy_sparse_matrix(adata.X)
x = torch.sparse_coo_tensor(x_index, x_values)
y = torch.tensor(labels, dtype=torch.long)
data = Data(edge_index=edge_index, edge_attr=edge_attr, x=x, y=y)
data = data.to(device)

# Invariant GNN model
model = MPNNModel(in_dim=adata.X.shape[1], out_dim=max(labels)+1, edge_dim=1, num_layers=2, emb_dim=128)
model = model.to(device)

train(model, data, train_mask, test_mask)


Epoch: 0 - Loss: 3.4639623165130615 - Train Accuracy: 0.06290927529335022 - Test Accuracy: 0.06025750935077667
Epoch: 1 - Loss: 2.565330743789673 - Train Accuracy: 0.3464057147502899 - Test Accuracy: 0.340600848197937
Epoch: 2 - Loss: 2.0675501823425293 - Train Accuracy: 0.4954749345779419 - Test Accuracy: 0.490128755569458
Epoch: 3 - Loss: 1.7654895782470703 - Train Accuracy: 0.5600765347480774 - Test Accuracy: 0.5486695170402527
Epoch: 4 - Loss: 1.5541701316833496 - Train Accuracy: 0.595909059047699 - Test Accuracy: 0.5922746658325195
Epoch: 5 - Loss: 1.3951432704925537 - Train Accuracy: 0.6290192008018494 - Test Accuracy: 0.6188841462135315
Epoch: 6 - Loss: 1.279425024986267 - Train Accuracy: 0.6530792713165283 - Test Accuracy: 0.646180272102356
Epoch: 7 - Loss: 1.1992542743682861 - Train Accuracy: 0.6714737415313721 - Test Accuracy: 0.6568240523338318
Epoch: 8 - Loss: 1.1407732963562012 - Train Accuracy: 0.6810389161109924 - Test Accuracy: 0.6674678325653076
Epoch: 9 - Loss: 1.0901

KeyboardInterrupt: 

In [4]:
device = 'cpu'

# Build the graph for the regular GNN
edge_index, edge_attr = from_scipy_sparse_matrix(
    adata.obsp['spatial_distances'])
edge_attr = edge_attr.view(-1, 1).float()
x_index, x_values = from_scipy_sparse_matrix(X_with_spatial)
x_values = x_values.float()
x = torch.sparse_coo_tensor(x_index, x_values)
y = torch.tensor(labels, dtype=torch.long)
data = Data(edge_index=edge_index, edge_attr=edge_attr, x=x, y=y)
data = data.to(device)

# Regular GNN model
model = MPNNModel(
    in_dim=X_with_spatial.shape[1], out_dim=max(labels)+1, edge_dim=1, num_layers=2, emb_dim=128)
model = model.to(device)

train(model, data, train_mask, test_mask)


Epoch: 0 - Loss: 3.968172073364258 - Train Accuracy: 0.03311014547944069 - Test Accuracy: 0.03673819825053215
Epoch: 1 - Loss: 3.0775532722473145 - Train Accuracy: 0.11242733895778656 - Test Accuracy: 0.11090128868818283
Epoch: 2 - Loss: 2.5385422706604004 - Train Accuracy: 0.3053491413593292 - Test Accuracy: 0.3009442090988159
Epoch: 3 - Loss: 2.2040483951568604 - Train Accuracy: 0.4374954104423523 - Test Accuracy: 0.43690988421440125
Epoch: 4 - Loss: 1.9733129739761353 - Train Accuracy: 0.5188727974891663 - Test Accuracy: 0.5150214433670044
Epoch: 5 - Loss: 1.8016785383224487 - Train Accuracy: 0.5646383762359619 - Test Accuracy: 0.5562231540679932
Epoch: 6 - Loss: 1.6676913499832153 - Train Accuracy: 0.5854609608650208 - Test Accuracy: 0.5764806866645813
Epoch: 7 - Loss: 1.56126868724823 - Train Accuracy: 0.5996615290641785 - Test Accuracy: 0.5907295942306519
Epoch: 8 - Loss: 1.4780988693237305 - Train Accuracy: 0.6146714687347412 - Test Accuracy: 0.6005150079727173
Epoch: 9 - Loss: 

KeyboardInterrupt: 

In [34]:
x_values.shape


torch.Size([1929622])