In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
from anndata import AnnData

In [None]:
# File must first be downloaded at https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92332
df = pd.read_csv("data/epithel_new/GSE92332_SalmHelm_UMIcounts.txt", sep='\t')
df = df.transpose()

In [None]:
cell_groups = []
barcodes = []
conditions = []
cell_types = []

for cell in df.index:
    cell_group, barcode, condition, cell_type = cell.split('_')
    cell_groups.append(cell_group)
    barcodes.append(barcode)
    conditions.append(condition)
    cell_types.append(cell_type)
    
conditions = np.array(conditions)

In [None]:
adata = AnnData(X = df.values, obs={
    "cell_groups": cell_groups,
    "barcodes": barcodes,
    "conditions": conditions,
    "cell_types": cell_types
})

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.c(adata, n_top_genes=2000, subset=True)

In [None]:
np.save('data/epithel/Control.npy', adata[adata.obs['conditions'] == 'Control'].X.copy())
np.save('data/epithel/Hpoly.npy', adata[adata.obs['conditions'] == 'Hpoly.Day10'].X.copy())
np.save('data/epithel/Salmonella.npy', adata[adata.obs['conditions'] == 'Salmonella'].X.copy())