# Grapher

## Dataset

- `id`: This column represents the id of the variant in the following format: #chrom:pos:ref:alt (string).

- `#chrom`: This column represents the chromosome number where the genetic variant is located.

- `pos`: This is the position of the genetic variant on the chromosome.

- `ref`: This column represents the reference allele (or variant) at the genomic position.

- `alt`: This is the alternate allele observed at this position.

- `rsids`: This stands for reference SNP cluster ID. It's a unique identifier for each variant used in the dbSNP database.

- `nearest_genes`: This column represents the gene which is nearest to the variant.

- `pval`: This represents the p-value, which is a statistical measure for the strength of evidence against the null hypothesis.

- `mlogp`: This represents the minus log of the p-value, commonly used in genomic studies.

- `beta`: The beta coefficient represents the effect size of the variant.

- `sebeta`: This is the standard error of the beta coefficient.

- `af_alt`: This is the allele frequency of the alternate variant in the general population.

- `af_alt_cases`: This is the allele frequency of the alternate variant in the cases group.

- `af_alt_controls`: This is the allele frequency of the alternate variant in the control group.

- `finemapped`: This column represents whether the variant is included in the post-finemapped dataset (1) or not (0). 

- `trait`: This column represents the trait associated with the variant. In this dataset, it is the response to the drug paracetamol and NSAIDs.

## Load libraries

In [1]:
import sys
import os
import random
import numpy as np
from numba import jit, prange
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, average_precision_score
from sklearn.preprocessing import RobustScaler, LabelEncoder, StandardScaler, OrdinalEncoder, OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, GATConv
from torch_geometric.utils import to_undirected, negative_sampling
import networkx as nx
from scipy.spatial import cKDTree
from scipy.special import expit
from typing import List, Dict
import time
import cProfile
import pstats
import io
import category_encoders as ce
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import copy
from torch_geometric.transforms import RandomNodeSplit
from collections import Counter



# Print versions of imported libraries
print(f"Python version: {sys.version}")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")
print(f"Matplotlib version: {matplotlib.__version__}")
print(f"Scikit-learn version: {sklearn.__version__}")
print(f"Torch version: {torch.__version__}")
print(f"Torch Geometric version: {torch_geometric.__version__}")
print(f"NetworkX version: {nx.__version__}")

if torch.cuda.is_available():
    device = torch.device("cuda")          # Current CUDA device
    print(f"Using {torch.cuda.get_device_name()} ({device})")
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Number of CUDA devices: {torch.cuda.device_count()}")
else:
    print("CUDA is not available on this device.")

Python version: 3.11.4 (tags/v3.11.4:d2340ef, Jun  7 2023, 05:45:37) [MSC v.1934 64 bit (AMD64)]
NumPy version: 1.24.1
Pandas version: 1.5.3
Matplotlib version: 3.7.1
Scikit-learn version: 1.3.0
Torch version: 2.0.1+cu117
Torch Geometric version: 2.3.1
NetworkX version: 3.0
Using NVIDIA RTX A6000 (cuda)
CUDA version: 11.7
Number of CUDA devices: 2


## Load data

In [2]:
dtypes = {
    'id': 'string',
    '#chrom': 'int64',
    'pos': 'int64',
    'ref': 'string',
    'alt': 'string',
    'rsids': 'string',
    'nearest_genes': 'string',
    'pval': 'float64',
    'mlogp': 'float64',
    'beta': 'float64',
    'sebeta': 'float64',
    'af_alt': 'float64',
    'af_alt_cases': 'float64',
    'af_alt_controls': 'float64',
    'finemapped': 'int64'
}

data = pd.read_csv('~/Desktop/gwas-graph/FinnGen/data/gwas-finemap.csv', dtype=dtypes)

# Assert column names
expected_columns = ['#chrom', 'pos', 'ref', 'alt', 'rsids', 'nearest_genes', 'pval', 'mlogp', 'beta',
                    'sebeta', 'af_alt', 'af_alt_cases', 'af_alt_controls', 'finemapped',
                    'id', 'trait']
assert set(data.columns) == set(expected_columns), "Unexpected columns in the data DataFrame."

# Assert data types
expected_dtypes = {
    'id': 'string',
    '#chrom': 'int64',
    'pos': 'int64',
    'ref': 'string',
    'alt': 'string',
    'rsids': 'string',
    'nearest_genes': 'string',
    'pval': 'float64',
    'mlogp': 'float64',
    'beta': 'float64',
    'sebeta': 'float64',
    'af_alt': 'float64',
    'af_alt_cases': 'float64',
    'af_alt_controls': 'float64',
    'finemapped': 'int64'
}

for col, expected_dtype in expected_dtypes.items():
    assert data[col].dtype == expected_dtype, f"Unexpected data type for column {col}."

## Data manipulation

In [3]:
data = data.sample(frac=0.2, random_state=42)

### Find nearest gene

In [4]:
data['nearest_genes'] = data['nearest_genes'].astype(str)

# Assert column 'nearest_genes' is a string
assert data['nearest_genes'].dtype == 'object', "Column 'nearest_genes' is not of string type."

# Get the length of the data before transformation
original_length = len(data)

# Extract the first gene name from the 'nearest_genes' column
data['nearest_genes'] = data['nearest_genes'].str.split(',').str[0]

# Reset index to have a standard index
data = data.reset_index(drop=True)

# Assert the length of the data remains the same
assert len(data) == original_length, "Length of the data has changed after transformation."

## Spec

### Data

`data` Pandas DataFrame:

- `id`: This column represents the id of the variant in the following format: #chrom:pos:ref:alt (string).
- `#chrom`: This column represents the chromosome number where the genetic variant is located.
- `pos`: This is the position of the genetic variant on the chromosome (int: 1-200,000).
- `ref`: This column represents the reference allele (or variant) at the genomic position.
- `alt`: This is the alternate allele observed at this position.
- `rsids`: This stands for reference SNP cluster ID. It's a unique identifier for each variant used in the dbSNP database.
- `nearest_genes`: This column represents the gene which is nearest to the variant (string).
- `pval`: This represents the p-value, which is a statistical measure for the strength of evidence against the null hypothesis.
- `mlogp`: This represents the minus log of the p-value, commonly used in genomic studies.
- `beta`: The beta coefficient represents the effect size of the variant.
- `sebeta`: This is the standard error of the beta coefficient.
- `af_alt`: This is the allele frequency of the alternate variant in the general population (float: 0-1.
- `af_alt_cases`: This is the allele frequency of the alternate variant in the cases group (float: 0-1).
- `af_alt_controls`: This is the allele frequency of the alternate variant in the control group (float: 0-1).
- `finemapped`: This column represents whether the variant is included in the post-finemapped dataset (1) or not (0) (int).
- `trait`: This column represents the trait associated with the variant. In this dataset, it is the response to the drug paracetamol and NSAIDs.

### Task Overview

The objective is to design and implement a binary node classification GNN model to predict whether variants are included after post-finemapping or not based on `finemapping`.

### Nodes and Their Features

There is one type of node: SNP nodes.

- **SNP Nodes**: Each SNP Node is characterized by various features, including `id`, `nearest_genes`, `#chrom`, `pos`, `ref`, `alt`, `mlogp`, `beta`, `sebeta`,  `af_alt`, `af_alt_cases`, and `af_alt_controls` columns.

### Edges, Their Features, and Labels

Edges represent relationships between SNP nodes in the graph.

1. **Type 1 Edges: LD-based edges**

   - For each pair of SNPs (row1 and row2) that exist on the same chromosome (`#chrom`), an edge is created if 1) both SNPs have `finemapped=1` or `finemapped=0`, and 2) the absolute difference between their positions (`pos`) is less than or equal to 1,000,000 and greater than 1 (no loops).
   - The following formula determines the weight of the edge:
     
```
    weights = 1 * e^(-ln(2) / 100_000 * pos_diff_abs)
    
```

    - Standardize the edge weights for each chromosome after all weights have been computed.

## Graph creation

In [5]:
import torch
from torch_geometric.data import Data
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import LabelEncoder
import category_encoders as ce
import pandas as pd
import numpy as np
import cProfile, pstats, io
import time
from typing import Tuple
import torch.multiprocessing as mp
from sklearn.pipeline import make_pipeline
from category_encoders import CountEncoder
from sklearn.preprocessing import OneHotEncoder, RobustScaler
from sklearn.compose import ColumnTransformer

In [6]:
# initialize cuda device, replace "cuda:0" and "cuda:1" with the GPU ids you want to use
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


def get_unique_snps(data: pd.DataFrame) -> dict:
    """
    Function to create mappings for SNPs to integer indices.
    """
    return {snp: idx for idx, snp in enumerate(data['id'].unique())}


def preprocess_snp_features(data: pd.DataFrame, snp_to_idx: dict) -> pd.DataFrame:
    """
    Function to create node feature vectors for SNPs and preprocess categorical and numerical features.
    """
    # Ensure that 'id' exists in the data and 'id' and 'nearest_genes' are not null
    assert 'id' in data.columns and 'nearest_genes' in data.columns, "Columns 'id' or 'nearest_genes' do not exist in the dataframe"
    assert data[['id', 'nearest_genes']].isnull().sum().sum() == 0, "Columns 'id' or 'nearest_genes' contain null values"

    # Columns to be extracted from the original dataframe
    cols_to_extract = ['id', 'nearest_genes', '#chrom', 'pos', 'ref', 'alt', 'mlogp', 'beta', 'sebeta', 'af_alt',
                       'af_alt_cases', 'af_alt_controls', 'finemapped']

    snp_features = data.loc[data['id'].isin(snp_to_idx.keys()), cols_to_extract].set_index('id').sort_index()
    scaler = RobustScaler()

    # Frequency encoding for 'nearest_genes' using LabelEncoder
    label_encoder = LabelEncoder()
    snp_features['nearest_genes'] = label_encoder.fit_transform(snp_features['nearest_genes'])

    categorical_cols = ['ref', 'alt']

    # CountEncoder for 'ref', 'alt'
    count_encoder = ce.CountEncoder(cols=categorical_cols)
    snp_features = count_encoder.fit_transform(snp_features)

    numerical_cols = list(set(snp_features.columns) - set(categorical_cols))
    snp_features[numerical_cols] = scaler.fit_transform(snp_features[numerical_cols])

    # Filling 0 values for all columns
    snp_features = snp_features.fillna(0)

    return snp_features


def process_data(data, window_size, device):
    log2 = torch.log(torch.tensor(2., device=device))  # Pre-compute log(2)
    positive_edges_snp_snp = []
    snp_weights = []

    for chrom, group in data.groupby('#chrom'):
        if group.empty:
            continue

        # Convert necessary columns to numpy arrays for faster processing
        group_snp_idx = torch.tensor(group['snp_idx'].values, device=device)
        group_pos = torch.tensor(group['pos'].values, device=device)
        group_finemapped = torch.tensor(group['finemapped'].values, device=device)

        # Slide window across the group with overlap
        overlap = window_size // 2  # 50% overlap
        for i in range(0, len(group) - overlap, overlap):
            window_snp_idx = group_snp_idx[i:i + window_size]
            window_pos = group_pos[i:i + window_size]
            window_finemapped = group_finemapped[i:i + window_size]

            # Skip if window is empty
            if len(window_pos) == 0:
                continue

            # Calculate differences in position for each pair of SNPs in the window
            pos_diff_abs = torch.abs(window_pos[None, :] - window_pos[:, None])  # vectorized subtraction
            finemapped_same = window_finemapped[None, :] == window_finemapped[:, None]  # whether both SNPs have finemapped=1 or finemapped=0

            mask = finemapped_same & (pos_diff_abs > 1) & (pos_diff_abs <= 300_000)

            weights = torch.exp(-log2 / 100_000 * pos_diff_abs[mask])
            if len(weights) == 0:
                continue

            indices = torch.nonzero(mask)
            positive_edges_snp_snp.extend(
                zip(window_snp_idx[indices[:, 0]].cpu().numpy(), window_snp_idx[indices[:, 1]].cpu().numpy()))
            snp_weights.extend(weights.cpu().numpy())

    return positive_edges_snp_snp, snp_weights


def preprocess_positive_edges(data: pd.DataFrame, snp_to_idx: dict, window_size: int = 30_000) -> Tuple[
    torch.Tensor, torch.Tensor]:
    scaler = RobustScaler()

    # Sort data once before grouping
    data = data.sort_values(by=['#chrom', 'pos'])

    # Create new column for SNP index
    data['snp_idx'] = data['id'].map(snp_to_idx)

    # Process the data
    positive_edges_snp_snp, snp_weights = process_data(data, window_size, device)

    # Normalize weights and shift the computations to GPU
    if snp_weights:  # Added condition here
        snp_weights = np.array(snp_weights, dtype=np.float32).reshape(-1, 1)
        scaler = scaler.fit(snp_weights)
        snp_weights = torch.from_numpy(scaler.transform(snp_weights)).to(device).flatten()
    else:
        snp_weights = torch.tensor([], dtype=torch.float).to(device)

    # Now, keep the edges on the CPU
    positive_edges_snp_snp = torch.from_numpy(np.array(positive_edges_snp_snp, dtype=np.int64)).T

    return positive_edges_snp_snp, snp_weights


def create_pytorch_graph(features: torch.Tensor, edges: torch.Tensor, edge_weights: torch.Tensor) -> Data:
    return Data(x=features, edge_index=edges, edge_attr=edge_weights)


# Profiling
pr = cProfile.Profile()
pr.enable()
start_time = time.time()

# Predefined chromosome number
chromosome = 7  # You can set your desired chromosome here

# Main
data = data[data['#chrom'] == chromosome]  # Only keep the data for the specified chromosome

snp_to_idx = get_unique_snps(data)
labels = data['finemapped'].map(lambda x: 1 if x > 0 else 0)
snp_features = preprocess_snp_features(data, snp_to_idx)

# Now, keep the features on the CPU
features = torch.tensor(snp_features.values, dtype=torch.float)

# Preprocess edges and weights
edges, weights = preprocess_positive_edges(data, snp_to_idx)

# Clear GPU cache
torch.cuda.empty_cache()

# Create the final graph (on the CPU)
graph = create_pytorch_graph(features, edges, weights)
graph.y = torch.tensor(labels.values, dtype=torch.long)

# Clear GPU cache
torch.cuda.empty_cache()

print(f"Number of nodes: {graph.num_nodes}")
print(f"Number of edges: {graph.num_edges}")
print(f"Node feature dimension: {graph.num_node_features}")

# Profiling
elapsed_time = time.time() - start_time
print(f"Execution time: {elapsed_time} seconds")
pr.disable()
s = io.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats(5)  # Only print the top 5 lines
print(s.getvalue())

Number of nodes: 227674
Number of edges: 374656810
Node feature dimension: 12
Execution time: 138.54895973205566 seconds
         8508827 function calls (8507698 primitive calls) in 138.161 seconds

   Ordered by: cumulative time
   List reduced from 1198 to 5 due to restriction <5>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       18    0.000    0.000  138.559    7.698 C:\Users\Windows\AppData\Local\Programs\Python\Python311\Lib\site-packages\IPython\core\interactiveshell.py:3472(run_code)
       18    0.000    0.000  138.559    7.698 {built-in method builtins.exec}
        1    9.742    9.742  135.174  135.174 C:\Users\Windows\AppData\Local\Temp\ipykernel_27548\2354801936.py:89(preprocess_positive_edges)
       50   70.776    1.416   70.776    1.416 {built-in method numpy.array}
        1    2.884    2.884   50.458   50.458 C:\Users\Windows\AppData\Local\Temp\ipykernel_27548\2354801936.py:46(process_data)





## Graph stats

## Data splitting

In [7]:
# Split the data into training, validation, and test sets
transform = RandomNodeSplit(split="random", num_train_per_class=2_500, num_val=0.01, num_test=0.01, key='y')
graph = transform(graph)

# Count the number of nodes per class in each set
train_class_counts = Counter(graph.y[graph.train_mask].numpy())
val_class_counts = Counter(graph.y[graph.val_mask].numpy())
test_class_counts = Counter(graph.y[graph.test_mask].numpy())

# Print the results
print("Number of nodes per class in each set:")
print("Train set:")
for class_label, count in train_class_counts.items():
    print(f"Class {class_label}: {count} nodes")
print("Validation set:")
for class_label, count in val_class_counts.items():
    print(f"Class {class_label}: {count} nodes")
print("Test set:")
for class_label, count in test_class_counts.items():
    print(f"Class {class_label}: {count} nodes")

# Calculate and print the percentage of class 1 vs. class 0 in the test set
total_test_nodes = sum(test_class_counts.values())
class_0_nodes = test_class_counts[0]
class_1_nodes = test_class_counts[1]
class_0_percentage = (class_0_nodes / total_test_nodes) * 100
class_1_percentage = (class_1_nodes / total_test_nodes) * 100
print(f"Percentage of Class 0 in test set: {class_0_percentage:.2f}%")
print(f"Percentage of Class 1 in test set: {class_1_percentage:.2f}%")

Number of nodes per class in each set:
Train set:
Class 0: 2500 nodes
Class 1: 2500 nodes
Validation set:
Class 0: 1900 nodes
Class 1: 377 nodes
Test set:
Class 0: 1923 nodes
Class 1: 354 nodes
Percentage of Class 0 in test set: 84.45%
Percentage of Class 1 in test set: 15.55%


## GraphSAGE

In [8]:
from sklearn.utils.class_weight import compute_class_weight
from torch.nn.functional import binary_cross_entropy, dropout, leaky_relu
from sklearn.metrics import precision_recall_curve, auc


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# suppose y is your target vector
y = graph.y.cpu().numpy()

# Count number of occurrences of each class
class_counts = np.bincount(y)

# Compute class weights
class_weights = 1. / class_counts

class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
class_weights = class_weights / class_weights.sum()


class GraphSAGEModel(torch.nn.Module):
    def __init__(self, num_node_features, hidden_layers, num_classes, dropout_rate=0.5):
        super(GraphSAGEModel, self).__init__()
        self.layers = torch.nn.ModuleList()
        self.layers.append(SAGEConv(num_node_features, hidden_layers[0]))
        for i in range(1, len(hidden_layers)):
            self.layers.append(SAGEConv(hidden_layers[i - 1], hidden_layers[i]))
        self.layers.append(SAGEConv(hidden_layers[-1], num_classes))
        self.dropout_rate = dropout_rate
        self.bn_layers = torch.nn.ModuleList([torch.nn.BatchNorm1d(size) for size in hidden_layers])

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        for i, conv in enumerate(self.layers[:-1]):
            x = conv(x, edge_index)
            x = self.bn_layers[i](x)
            x = torch.nn.functional.leaky_relu(x)
            x = torch.nn.functional.dropout(x, p=self.dropout_rate, training=self.training)
        x = self.layers[-1](x, edge_index)
        return torch.sigmoid(x.view(-1))
    
    
class FocalLoss(torch.nn.Module):
    def __init__(self, alpha=None, gamma=2, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.alpha is not None:
            alpha_t = self.alpha[targets.long()].view(-1, 1)
            logpt = -binary_cross_entropy(inputs, targets, reduction='none')
            logpt = logpt * alpha_t
        else:
            logpt = -binary_cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(logpt)
        F_loss = -((1 - pt) ** self.gamma) * logpt
        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

        
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def step(self, val_loss):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0\
            
            
def train(model, data, loss_fn, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data)[data.train_mask].squeeze()
    loss = loss_fn(out, data.y[data.train_mask].float())
    loss.backward()
    optimizer.step()
    return loss.item()


def evaluate(model, data, mask, loss_fn):
    model.eval()
    with torch.no_grad():
        out = model(data)[mask].squeeze()
        preds = (out > 0.5).long()
        loss = loss_fn(out, data.y[mask].float())
        accuracy = preds.eq(data.y[mask]).sum().item() / mask.sum().item()
        precision, recall, _ = precision_recall_curve(data.y[mask].cpu(), out.cpu())
        auprc = auc(recall, precision)
    return loss.item(), accuracy, auprc



def main():
    # initialization
    hidden_layers = [64, 64] 
    model = GraphSAGEModel(graph.num_node_features, hidden_layers, 1, dropout_rate=0.5).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
    loss_fn = FocalLoss(alpha=class_weights, gamma=2)

    early_stopping = EarlyStopping(patience=10, min_delta=0.0001)

    # Move your graph data to device
    graph.x = graph.x.to(device)
    graph.edge_index = graph.edge_index.to(device)
    graph.y = graph.y.to(device)
    graph.train_mask = graph.train_mask.to(device)
    graph.val_mask = graph.val_mask.to(device)
    graph.test_mask = graph.test_mask.to(device)

    for epoch in range(150):
        train_loss = train(model, graph, loss_fn, optimizer)
        _, train_acc, _ = evaluate(model, graph, graph.train_mask, loss_fn)
        val_loss, val_acc, val_auprc = evaluate(model, graph, graph.val_mask, loss_fn)
        print(f'Epoch: {epoch+1}, Loss: {train_loss:.10f}, Train Acc: {train_acc:.10f}, Val Loss: {val_loss:.10f}, Val Acc: {val_acc:.10f}, Val AUPRC: {val_auprc:.10f}')

        # Reduce learning rate when validation loss plateaus
        scheduler.step(val_loss)
        
        # Early stopping
        early_stopping.step(-val_auprc)  # Pass -val_auprc because we want to maximize it
        if early_stopping.early_stop:
            print("Early stopping!")
            break

        # Save checkpoint if is a new best
        if -val_auprc == early_stopping.best_score:  # Save the best model based on AUPRC
            torch.save(model.state_dict(), 'checkpoint.pt')

    # Load the best model back in
    if os.path.isfile('checkpoint.pt'):
        model.load_state_dict(torch.load('checkpoint.pt'))
    else:
        print("No checkpoint found. Creating a new file checkpoint.pt.")
        torch.save(model.state_dict(), 'checkpoint.pt')
        
    test_loss, test_acc, test_auprc = evaluate(model, graph, graph.test_mask, loss_fn)
    print(f'After early stopping, Test AUPRC: {test_auprc:.10f}')

# Call the main function
main()


OutOfMemoryError: CUDA out of memory. Tried to allocate 89.33 GiB (GPU 0; 47.99 GiB total capacity; 7.19 GiB already allocated; 33.34 GiB free; 12.29 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF