In [3]:
import pandas as pd
import numpy as np

# Assuming you have gene expression data in a DataFrame where rows are genes and columns are samples
# Replace this with your actual gene expression data
gene_expression_data = pd.DataFrame({
    'Gene1': [1, 2, 3, 4, 5],
    'Gene2': [5, 4, 3, 2, 1],
    'Gene3': [3, 2, 1, 3, 2],
    'Gene4': [2, 3, 4, 1, 5]
})

def co_expression_similarity(data):
    """
    Calculate co-expression similarity of genes using Pearson correlation coefficient
    :param data: DataFrame where rows are genes and columns are samples
    :return: DataFrame containing pairwise co-expression similarity between genes
    """
    return data.corr(method='pearson')

# Calculate co-expression similarity
co_expression_similarity_matrix = co_expression_similarity(gene_expression_data)
print(co_expression_similarity_matrix)

          Gene1     Gene2     Gene3     Gene4
Gene1  1.000000 -1.000000 -0.188982  0.400000
Gene2 -1.000000  1.000000  0.188982 -0.400000
Gene3 -0.188982  0.188982  1.000000 -0.755929
Gene4  0.400000 -0.400000 -0.755929  1.000000


In [4]:
def threshold_adjacency(similarity_matrix, threshold):
    """
    Calculate unweighted and weighted adjacency matrices from a similarity matrix using a threshold
    :param similarity_matrix: DataFrame containing pairwise similarities between genes
    :param threshold: Threshold value to determine which similarities should be considered as edges
    :return: Tuple containing unweighted and weighted adjacency matrices
    """
    # Initialize matrices
    num_genes = similarity_matrix.shape[0]
    unweighted_adjacency = np.zeros((num_genes, num_genes))
    weighted_adjacency = np.zeros((num_genes, num_genes))

    # Calculate unweighted and weighted adjacency matrices
    for i in range(num_genes):
        for j in range(num_genes):
            similarity = similarity_matrix.iloc[i, j]
            if similarity >= threshold:
                unweighted_adjacency[i, j] = 1
                weighted_adjacency[i, j] = similarity

    return unweighted_adjacency, weighted_adjacency

# Example usage
threshold_value = 0.5  # Set your threshold value here
unweighted_adjacency_matrix, weighted_adjacency_matrix = threshold_adjacency(co_expression_similarity_matrix, threshold_value)

print("Unweighted Adjacency Matrix:")
print(unweighted_adjacency_matrix)
print("\nWeighted Adjacency Matrix:")
print(weighted_adjacency_matrix)

Unweighted Adjacency Matrix:
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]

Weighted Adjacency Matrix:
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [5]:
import torch
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, mutual_info_regression
from sklearn.neighbors import kneighbors_graph

# Example gene expression data (replace this with your actual omics data)
omics_data = torch.randn(100, 500)  # Assuming 100 samples and 500 features (genes)

# Define a placeholder for the target variable (if applicable)
target_variable = torch.randint(0, 2, (100,))  # Assuming binary classification with 100 samples


# Preprocess the data
scaler = StandardScaler()
omics_data_normalized = scaler.fit_transform(omics_data.numpy())

# Feature selection (optional)
selector = SelectKBest(score_func=mutual_info_regression, k=100)
omics_data_selected = selector.fit_transform(omics_data_normalized, target_variable)


# Construct a K-nearest neighbor graph
adjacency_matrix = kneighbors_graph(omics_data_selected, n_neighbors=10, mode='connectivity').toarray()

# Convert adjacency matrix to edge index format
edge_index = torch.tensor(adjacency_matrix.nonzero(), dtype=torch.long)

# Create a PyG Data object
graph_data = Data(x=torch.tensor(omics_data_selected, dtype=torch.float32),
                  edge_index=edge_index,
                  y=target_variable)

print(graph_data)



Data(x=[100, 100], edge_index=[2, 1000], y=[100])


  edge_index = torch.tensor(adjacency_matrix.nonzero(), dtype=torch.long)


In [1]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

  from .autonotebook import tqdm as notebook_tqdm
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [2]:
data = dataset[0]

In [3]:
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Number of training nodes: 140
Training node label rate: 0.05
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [13]:
from torch_geometric.nn import GCNConv
import torch.nn.functional as F


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(hidden_channels=16)
print(model)

GCN(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 7)
)


In [15]:
data.x.shape, data.edge_index.shape

(torch.Size([2708, 1433]), torch.Size([2, 10556]))

In [30]:
for a in data.x[0]:
    print(a)

tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.1111)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.1111)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
te

In [27]:
for pin in range(100):
    print(data.edge_index[0][pin], data.edge_index[1][pin])

tensor(0) tensor(633)
tensor(0) tensor(1862)
tensor(0) tensor(2582)
tensor(1) tensor(2)
tensor(1) tensor(652)
tensor(1) tensor(654)
tensor(2) tensor(1)
tensor(2) tensor(332)
tensor(2) tensor(1454)
tensor(2) tensor(1666)
tensor(2) tensor(1986)
tensor(3) tensor(2544)
tensor(4) tensor(1016)
tensor(4) tensor(1256)
tensor(4) tensor(1761)
tensor(4) tensor(2175)
tensor(4) tensor(2176)
tensor(5) tensor(1629)
tensor(5) tensor(1659)
tensor(5) tensor(2546)
tensor(6) tensor(373)
tensor(6) tensor(1042)
tensor(6) tensor(1416)
tensor(6) tensor(1602)
tensor(7) tensor(208)
tensor(8) tensor(269)
tensor(8) tensor(281)
tensor(8) tensor(1996)
tensor(9) tensor(723)
tensor(9) tensor(2614)
tensor(10) tensor(476)
tensor(10) tensor(2545)
tensor(11) tensor(1655)
tensor(11) tensor(1839)
tensor(12) tensor(1001)
tensor(12) tensor(1318)
tensor(12) tensor(2661)
tensor(12) tensor(2662)
tensor(13) tensor(1701)
tensor(13) tensor(1810)
tensor(14) tensor(158)
tensor(14) tensor(2034)
tensor(14) tensor(2075)
tensor(14) tens

In [23]:
import numpy as np 
np.sqrt(10556)

102.74239631233058

In [16]:
out = model(data.x, data.edge_index)

In [18]:
criterion = torch.nn.CrossEntropyLoss()
loss = criterion(out[data.train_mask], data.y[data.train_mask])
print(loss)

tensor(1.9463, grad_fn=<NllLossBackward0>)
