# Run DGE using MAST


In [1]:
import os
import scanpy as sc
import anndata as ad

import numpy as np
import scipy
import pandas as pd
import matplotlib.pyplot as pltb
import seaborn as sns
import sys
sys.path.append('Desktop/colitis/')


## function 1. subset h5ad into two conditions (pairwise)

- XAUT1: adata.obs["condition"]

- XAUT2: adata.obs["disease"], adata.obs["cpi"], adata.obs["suppression"]

For each "category", we will make a separate directory such as "XAUT1/2_category".


In [2]:
# function to split the adata into pairs of conditions.
# inputs: adata, conditions ("disease_status", "condition", etc.)
# inputs: output_path, dataname (dataset name, "XAUT1_Blood", etc.)

def subset_adata_conditions_pairwise(adata, conditions, output_path, dataname):
    # extract the conditions
    condition_list = adata.obs[conditions].unique().to_list()
    
    # make a pair of two conditions
    for i in range(0,len(condition_list)-1):
        condition1 = condition_list[i]
        for j in range(i+1,len(condition_list)):
            condition2 = condition_list[j]
            
            # subset the adata for condition1 and condition2
            adata_subset = adata[adata.obs[conditions].isin([condition1, condition2])]
            
            # define the new obs field "condition" for easier MAST running (no more disease, condition, cpi, etc.)
            adata_subset.obs["condition"] = adata.obs[conditions]
            
            # make directory for each subsetted anndata (for exported csvs later)
            filepath = os.path.join(output_path, dataname+"_"+condition1+"_"+condition2)
            if not os.path.exists(output_path):
                os.mkdir(output_path)
            if not os.path.exists(filepath):
                os.mkdir(filepath)
            
            # save the subsetted anndata for each subdirectory
            adata_subset.write_h5ad(filepath+ "/" + dataname + "_"+condition1+"_"+condition2+".h5ad")
            # os.system("mkdir output_path")

In [10]:
#biopsy disease
adata= sc.read_h5ad('Biopsy_RNA_Final.h5ad')
output_path = 'C:/Users/joshu/Desktop/colitis'
subset_adata_conditions_pairwise(adata, "disease", output_path, "XAUT2_Biopsy")

  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]


In [11]:
#biopsy CPI and suppression
adata= sc.read_h5ad('Biopsy_RNA_Final.h5ad')
output_path = 'C:/Users/joshu/Desktop/colitis'
subset_adata_conditions_pairwise(adata, "cpi_sup", output_path, "XAUT2_Biopsy")

  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]


In [None]:
#blood disease
adata= sc.read_h5ad('Blood_RNA_Final.h5ad')
output_path = 'C:/Users/joshu/Desktop/colitis'
subset_adata_conditions_pairwise(adata, "disease", output_path, "XAUT2_Blood")

In [12]:
#Luoma CD45 sorted
adata= sc.read_h5ad('Luoma_CD45sorted_Final.h5ad')
output_path = 'C:/Users/joshu/Desktop/colitis'
subset_adata_conditions_pairwise(adata, "disease", output_path, "45")

  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]


In [13]:
#Luoma CD45 sorted
adata= sc.read_h5ad('Luoma_CD3sorted_Final.h5ad')
output_path = 'C:/Users/joshu/Desktop/colitis'
subset_adata_conditions_pairwise(adata, "disease", output_path, "3")

  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]
  adata_subset.obs["condition"] = adata.obs[conditions]


In [None]:
### NEXT: input each h5ad into "convert_subset_h5ad_to_MAST_csv.py" to output files to input into MAST scripts