In [1]:
import h5py
import numpy as np
import collections
import scipy.sparse as sp_sparse
import tables
import pandas as pd

In [4]:
file_path = "../data/GSE115149_filtered_gene_bc_matrices_h5.h5"

with h5py.File(file_path) as f:
    data = f["hg19"]
    print(list(data))

['barcodes', 'data', 'gene_names', 'genes', 'indices', 'indptr', 'shape']


  This is separate from the ipykernel package so we can avoid doing imports until


In [5]:

def get_matrix_from_h5(filename):
    with tables.open_file(filename, 'r') as f:
        mat_group = f.get_node(f.root, 'hg19')
        barcodes = f.get_node(mat_group, 'barcodes').read()
        print(barcodes)
        data = getattr(mat_group, 'data').read()
        indices = getattr(mat_group, 'indices').read()
        indptr = getattr(mat_group, 'indptr').read()
        shape = getattr(mat_group, 'shape').read()
        matrix = sp_sparse.csc_matrix((data, indices, indptr), shape=shape)
        gene_names = f.get_node(mat_group, 'gene_names').read()
    
        return barcodes, matrix, gene_names
 

barcodes, filtered_feature_bc_matrix, gene_names = get_matrix_from_h5(file_path)

[b'AAACCTGCAAGCCGTC-1' b'AAACCTGCACAGACTT-1' b'AAACCTGCACAGCGTC-1' ...
 b'TTTGTCACATCCCACT-2' b'TTTGTCACATCCGCGA-2' b'TTTGTCATCAGGCCCA-2']


In [6]:
import scipy.sparse

msc_dataset = pd.DataFrame.sparse.from_spmatrix(filtered_feature_bc_matrix)


In [7]:
msc_dataset.columns = barcodes
msc_dataset.index = gene_names
msc_dataset.head(1)

Unnamed: 0,b'AAACCTGCAAGCCGTC-1',b'AAACCTGCACAGACTT-1',b'AAACCTGCACAGCGTC-1',b'AAACCTGCATGGATGG-1',b'AAACCTGGTAAATGAC-1',b'AAACCTGTCCTGCAGG-1',b'AAACGGGTCACAGTAC-1',b'AAACGGGTCTCACATT-1',b'AAACGGGTCTCGCATC-1',b'AAACGGGTCTCTTATG-1',...,b'TTTGCGCTCGACCAGC-2',b'TTTGCGCTCGCCTGTT-2',b'TTTGCGCTCTAACTGG-2',b'TTTGGTTCAAGGACAC-2',b'TTTGGTTCACCTTGTC-2',b'TTTGGTTGTTCACCTC-2',b'TTTGGTTTCAGATAAG-2',b'TTTGTCACATCCCACT-2',b'TTTGTCACATCCGCGA-2',b'TTTGTCATCAGGCCCA-2'
b'MIR1302-10',0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
msc_dataset.to_csv("../results/msc_dataset.csv")