In [None]:
import os
import anndata
import pandas as pd
from kh import sketch
from utils import *

### Read in preprocessed T47D replicate, perform kernel herding sketching, and save files

In [None]:
directory = 'data'
t47_rep_ad = anndata.read_h5ad(os.path.join(directory, 'T47D_replicate_preprocessed.h5ad'))
kh_indices_t47_rep_ad, t47_rep_ad_subsample = sketch(t47_rep_ad, sample_set_key = 'well', gamma = 1, num_subsamples = 2000, n_jobs = -1, frequency_seed = 0)

In [None]:
t47_rep_ad_subsample.write(os.path.join(directory, 'sketched_rep.h5ad'))

sketched_rep_df = pd.DataFrame(t47_rep_ad_subsample.X, index = t47_rep_ad_subsample.obs_names, columns = t47_rep_ad_subsample.var_names)
sketched_rep_df = pd.concat([sketched_rep_df, t47_rep_ad_subsample.obs], axis = 1)
sketched_rep_df.to_csv(os.path.join(directory, 'sketched_rep_df.csv'))

### Perform logistic regression analysis on the replicate

In [None]:
## logistic regression
directory = 'data'
filename = 'sketched_rep'

colors_dict = {'G0':'#5CAD92',
                'G1':'#594997',
                'G2M':'#E7739A',
                'S':'#0099CC'}

## read in merged anndata
adata = anndata.read(os.path.join('data', filename + '.h5ad'))
adata.obs['phase'] = adata.obs['phase'].cat.rename_categories({'G0':'G0', 'G1':'G1', 'G2/M':'G2M','S':'S'})

## binarize labels: 0 = untreated, 1 = treated (i.e. 10, 100nM)
adata.obs['condition'] = 'nan'
adata.obs['condition'].values[adata.obs['well'] == '0'] = '0'
adata.obs['condition'].values[adata.obs['well'] != '0'] = '1'

## plot mean expression of each marker
mean_barplots(adata = adata, feature_order = ['pRB_over_RB', 'Ki67', 'pRB', 'RB', 'CDK2', 'CDK4', 'cycD1', 'cycE', 'Cdt1', 'E2F1', 'DNA', 'cycA', 'cycB1', 'p21'], ylim = [-1, 3.5], colors_dict = colors_dict, save_directory = 'output', filename_save = 'T47D_rep_barplots')

## run logistic regression on T47D replicate
adata_run = adata[:, adata.var_names != 'pRB_over_RB'].copy()
run_logistic_regression(adata = adata_run, groups = ['G0', 'G1', 'S', 'G2M'], origin = 'T47D_rep', condition_key = 'condition', labels_key = 'phase', n_splits = 5, save_directory = 'output', ylim = [-2.5, 2.5])