In [1]:
from __future__ import print_function
import argparse
import pprint
import numpy as np
import pandas as pd
import anndata

from GeneGloVe import GeneGloVe  # Assuming the GeneGloVe class is saved in this file
from scipy.sparse import coo_matrix

def read_gene_expression(filename):
    """
    Read a gene expression dataset from an .h5ad file and yield gene expression vectors.
    """
    adata = anndata.read_h5ad(filename)
    for row in adata.X:  # Assuming .X contains the expression data
        yield row


def create_co_occurrence_matrix(data, gene_names):
    """
    Create a co-occurrence matrix from gene expression data.
    Returns a COO format sparse matrix.
    """
    # Example logic for constructing a co-occurrence matrix
    num_genes = len(gene_names)
    rows, cols, data_values = [], [], []
    
    for expression in data:
        non_zero_indices = np.nonzero(expression)[0]
        for i in range(len(non_zero_indices)):
            for j in range(i + 1, len(non_zero_indices)):
                gene_i = non_zero_indices[i]
                gene_j = non_zero_indices[j]
                
                # Calculate co-occurrence (here using simple increment)
                rows.append(gene_i)
                cols.append(gene_j)
                data_values.append(1)  # Adjust based on your co-occurrence logic

    return coo_matrix((data_values, (rows, cols)), shape=(num_genes, num_genes))

