In [4]:
import numpy as np
import scanpy as sc
import squidpy as sq
from tqdm.auto import tqdm
from torch.utils.data import Dataset, random_split, DataLoader
import torch.nn as nn

In [5]:
adata = sc.read("../example_files/img_1199670929.h5ad")

In [6]:
# now we have the adata object of just a single image
sq.gr.spatial_neighbors(adata=adata, radius=1000, key_added="adjacency_matrix", coord_type="generic")

In [7]:
# function to get k lowest values from each row of a sparse matrix
def get_k_lowest_values(matrix, k):
    n_rows = matrix.shape[0]
    k_lowest_indices = np.empty((n_rows, k), dtype=int)
    for i in range(n_rows):
        start = matrix.indptr[i]
        end = matrix.indptr[i + 1]
        row_data = matrix.data[start:end]
        row_indices = matrix.indices[start:end]
        k_smallest_indices = np.argpartition(row_data, k)[:k]
        k_lowest_indices[i] = row_indices[k_smallest_indices]
    return k_lowest_indices

closest_matrix = get_k_lowest_values(adata.obsp['adjacency_matrix_distances'], 5)

In [11]:
# we construct dataset using closest 5 cells

X = []
y = []

for i, cell in tqdm(enumerate(adata.X), total=len(adata)):
    y.append(cell.toarray())
    five_closest_cells = np.array([adata.X[index].toarray() for index in closest_matrix[i]])
    X.append(five_closest_cells.flatten())

X = np.array(X)
y = np.concatenate(y)

  0%|          | 0/26230 [00:00<?, ?it/s]


KeyboardInterrupt



In [9]:
#X = np.concatenate(X)
print(y.shape, X.shape)

(26230, 550) (26230, 2750)


In [10]:
# we use 80% of the data for training and 10% for validation and 10% for testing
# Create a custom dataset
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = X

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create the dataset and split it into training, validation, and testing sets
dataset = MyDataset(X, y)
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])

In [None]:
# Create data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

In [143]:
class LinearModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)