In [2]:
import squidpy as sq

adata = sq.datasets.seqfish()

In [3]:
from anndata.experimental.pytorch import AnnLoader
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from scipy.sparse import hstack

labels = adata.obs['celltype_mapped_refined'].cat.codes.values
X_with_spatial = hstack((adata.X, adata.obsm['spatial']))

X_train, X_test, y_train, y_test = train_test_split(
    adata.X, labels, test_size=0.3,)

X_train_spatial, X_test_spatial, y_train, y_test = train_test_split(
    X_with_spatial, labels, test_size=0.3,)



In [None]:
from sklearn import svm
from sklearn import metrics

classifier = svm.SVC(kernel='rbf')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print("Accuracy (RBF SVC):", metrics.accuracy_score(y_test, y_pred))

classifier = svm.SVC(kernel='rbf')
classifier.fit(X_train_spatial, y_train)
y_pred = classifier.predict(X_test_spatial)
print("Accuracy (RBF SVC, Spatial):", metrics.accuracy_score(y_test, y_pred))


In [4]:
import random
import torch
from torch_geometric.data import Data
from torch_geometric.utils import from_scipy_sparse_matrix
from GNN import MPNNModel

device = 'cpu'

model = MPNNModel(in_dim=adata.X.shape[1], out_dim=max(labels)+1, edge_dim=1)
model = model.to(device)

if 'spatial_distances' not in adata.obsp:
    sq.gr.spatial_neighbors(adata)
edge_index, edge_attr = from_scipy_sparse_matrix(
    adata.obsp['spatial_distances'])
edge_attr = edge_attr.view(-1, 1).float()
x_index, x_values = from_scipy_sparse_matrix(adata.X)
x = torch.sparse_coo_tensor(x_index, x_values)
y = torch.tensor(labels, dtype=torch.long)
data = Data(edge_index=edge_index, edge_attr=edge_attr, x=x, y=y)
data = data.to(device)


In [None]:
import torch.nn.functional as F

train_mask_size = int(len(data.x) * 0.7)
shuffled_indices = list(range(len(data.x)))
random.shuffle(shuffled_indices)
train_mask = torch.tensor(shuffled_indices[:train_mask_size], dtype=torch.long)
test_mask = torch.tensor(shuffled_indices[train_mask_size:], dtype=torch.long)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model.train()
epochs = 200
for epoch in range(epochs):
    out = model(data)
    predictions = torch.argmax(out, dim=1)
    loss = F.cross_entropy(out[train_mask], data.y[train_mask])
    loss.backward()
    optimizer.step()
    train_acc = (predictions[train_mask] == data.y[train_mask]).sum() / len(train_mask)
    test_acc = (predictions[test_mask] == data.y[test_mask]).sum() / len(test_mask)
    print(f'Epoch: {epoch} - Loss: {loss} - Train Accuracy: {train_acc} - Test Accuracy: {test_acc}')
