In [1]:
import numpy as np
import pandas as pd
import scipy.io
import scanpy as sc
import matplotlib.pyplot as plt
import anndata as ad

In [2]:
# Load data
matrix = scipy.io.mmread('/home/mcb/users/ssue1/DECON/data/MDD/MDD_GEO/GSE144136_GeneBarcodeMatrix_Annotated.mtx')
full_matrix = matrix.toarray()

cells = pd.read_csv('/home/mcb/users/ssue1/DECON/data/MDD/MDD_GEO/GSE144136_CellNames.csv', index_col=0)

genes = pd.read_csv('/home/mcb/users/ssue1/DECON/data/MDD/MDD_GEO/GSE144136_GeneNames.csv', index_col=0)

df = pd.DataFrame(full_matrix, index=genes['x'], columns=cells.index)
df = df.T

X = df.values

print(df.shape)

(78886, 30062)


In [3]:
print(df)

x      RP11-34P13.3  RP11-34P13.7  RP11-34P13.14  FO538757.3  FO538757.2  \
1                 0             0              0           0           1   
2                 0             0              0           0           1   
3                 0             0              0           0           1   
4                 0             0              0           0           0   
5                 0             0              0           0           0   
...             ...           ...            ...         ...         ...   
78882             0             0              0           0           0   
78883             0             0              0           0           0   
78884             0             0              0           0           0   
78885             0             0              0           0           0   
78886             0             0              0           0           0   

x      AP006222.2  RP5-857K21.15  RP4-669L17.2  RP4-669L17.10  RP5-857K21.4  \
1       

In [4]:
def populateAnnData(mat,df, cells):
    import anndata
    adata = anndata.AnnData(X=mat)
    adata.var_names = df.columns.values
    adata.obs['cell_types'] = cells['Cell_Type'].values
    adata.obs['Cell_SubType'] = cells['Cell_SubType'].values
    adata.obs['Subject'] = cells['Subject'].values
    adata.obs['Diagnosis'] = cells['Diagnosis'].values
    adata.obs['batch_indices'] = cells['Batch'].values
    return adata

adata = populateAnnData(X, df, cells)
print(adata)

AnnData object with n_obs × n_vars = 78886 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'


In [21]:
adata = ad.read_h5ad('/home/mcb/users/ssue1/DECON/data/MDD/loo.h5ad')
print(adata)

AnnData object with n_obs × n_vars = 23598 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'


In [3]:
adata.obs.Subject.unique()

array([ 3,  9, 12, 13, 15, 20, 21, 24, 25,  2,  7, 16, 19, 27, 29, 31, 22])

In [4]:
adata_filter = adata[~adata.obs['Subject'].isin(['13','15', '20']),:]
adata_filter

View of AnnData object with n_obs × n_vars = 23598 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'

In [5]:
adata_filter.write('/home/mcb/users/ssue1/DECON/data/MDD/loo.h5ad')

In [22]:
print(adata.obs.cell_types)

0                 Ex
1              Inhib
2                 Ex
3                 Ex
4              Inhib
            ...     
75654          Inhib
75655          Inhib
75656    Micro/Macro
75657             Ex
75658             Ex
Name: cell_types, Length: 23598, dtype: category
Categories (7, object): ['Astros', 'Endo', 'Ex', 'Inhib', 'Micro/Macro', 'OPCs', 'Oligos']


In [4]:
adata_filter = adata[~adata.obs['cell_types'].isin(['Mix']),:]
print(adata_filter)

View of AnnData object with n_obs × n_vars = 32700 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'


In [5]:
adata_filter.write('/home/mcb/users/ssue1/DECON/data/MDD/mdd_scaden_forbulk.h5ad')

In [4]:
adata_filter = adata_filter[adata_filter.obs['Diagnosis'].isin(['0']),:]
print(adata_filter)

View of AnnData object with n_obs × n_vars = 32158 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'


In [24]:
print(adata.obs.Diagnosis.unique())

[0]


In [28]:
sc.pp.filter_cells(adata, min_genes=500)
sc.pp.filter_genes(adata, min_cells=5)
print(adata)

Trying to set attribute `.obs` of view, copying.


AnnData object with n_obs × n_vars = 16269 × 25677
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices', 'n_genes'
    var: 'n_cells'


In [29]:
sc.pp.normalize_total(adata, target_sum=1e4, exclude_highly_expressed=True)

In [30]:
sc.pp.log1p(adata)

In [31]:
from sklearn import preprocessing as pp

def sample_scaling(x):
    mms = pp.MinMaxScaler(feature_range=(0, 1), copy=True)
    # it scales features so transpose is needed
    x = mms.fit_transform(x.T).T
    return x

adata.X = sample_scaling(adata.X)
print(adata)

AnnData object with n_obs × n_vars = 16269 × 25677
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices', 'n_genes'
    var: 'n_cells'
    uns: 'log1p'


In [16]:
adata.obs.cell_types.unique()

['Ex', 'Inhib', 'Astros', 'Oligos', 'OPCs', 'Micro/Macro', 'Endo']
Categories (7, object): ['Ex', 'Inhib', 'Astros', 'Oligos', 'OPCs', 'Micro/Macro', 'Endo']

In [32]:
adata.write('/home/mcb/users/ssue1/DECON/data/MDD/mdd_loo_labonte_filtered.h5ad')

In [2]:
adata = ad.read_h5ad('/home/mcb/users/ssue1/DECON/data/MDD/mdd_healthy_filtered.h5ad')
print(adata.obs.cell_types.unique())

['Ex', 'Inhib', 'Oligos', 'Astros', 'Micro/Macro', 'OPCs', 'Endo']
Categories (7, object): ['Ex', 'Inhib', 'Oligos', 'Astros', 'Micro/Macro', 'OPCs', 'Endo']


In [3]:
adata

AnnData object with n_obs × n_vars = 32700 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'

In [33]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(adata, adata.obs.cell_types, test_size=0.15, random_state=10)

In [20]:
print(X_train)

View of AnnData object with n_obs × n_vars = 16339 × 25350
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices', 'n_genes'
    var: 'n_cells'
    uns: 'log1p'


In [21]:
print(X_test)

View of AnnData object with n_obs × n_vars = 7003 × 25350
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices', 'n_genes'
    var: 'n_cells'
    uns: 'log1p'


In [16]:
print(healthy)

View of AnnData object with n_obs × n_vars = 23435 × 25920
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices', 'n_genes'
    var: 'n_cells'


In [35]:
X_train.write('/home/mcb/users/ssue1/DECON/data/MDD/mdd_loo_labonte_filtered_train.h5ad')

In [6]:
bulk = pd.read_csv('/home/mcb/users/ssue1/DECON/data/MDD/aINS_loo.csv', index_col=0)
print(bulk)

             17   20  150  150_ofc  17_dlpfc  20_dlpfc  150_dlpfc
5S_rRNA       2    6    3        8         4         8         20
5_8S_rRNA     0    0    0        0         0         0          0
7SK          21   15   13       18        23        14         45
A1BG         45   43   26       72        36        48        102
A1BG-AS1    128   74   55       86        77        86        226
...         ...  ...  ...      ...       ...       ...        ...
snoZ40        0    0    0        0         0         0          0
snoZ6         0    0    0        0         0         0          0
snosnR66      0    0    0        0         0         0          0
uc_338      108  123   54      124       108       114        314
yR211F11.2    2    2    6        6        12         6         16

[58131 rows x 7 columns]


In [6]:
bulk = bulk.T

In [7]:
print(bulk)

             1      2     3     4     5     6     7      8      9      10  \
A1BG        1.0    0.0   0.0   0.0   0.0   0.0   1.0    0.0    0.0    1.0   
A1BG-AS1   20.0   31.0   5.0   4.0   5.0   8.0  12.0   17.0   21.0   19.0   
A1CF        0.0    1.0   0.0   0.0   0.0   0.0   0.0    2.0    0.0    0.0   
A2M         5.0    7.0   1.0   5.0   2.0   1.0   1.0    5.0    5.0    4.0   
A2M-AS1     3.0    4.0   2.0   2.0   2.0   2.0   1.0    1.0    1.0    1.0   
...         ...    ...   ...   ...   ...   ...   ...    ...    ...    ...   
ZYG11A      0.0    0.0   0.0   0.0   0.0   0.0   0.0    1.0    0.0    0.0   
ZYG11B    157.0  208.0  37.0  44.0  51.0  96.0  78.0  125.0  124.0  117.0   
ZYX        81.0   62.0   5.0   8.0  12.0  14.0   8.0   39.0   31.0   49.0   
ZZEF1      91.0  111.0  25.0  24.0  31.0  37.0  26.0   95.0   84.0   71.0   
ZZZ3       80.0  129.0  22.0  47.0  44.0  83.0  42.0  136.0  131.0   96.0   

             11    12    13  
A1BG        0.0   0.0   0.0  
A1BG-AS1   37.0

In [25]:
genes = sorted(list(set(bulk.index).intersection(adata.var_names)))
print(len(genes))

29149


In [26]:
adata = adata[:, genes]

In [27]:
print(adata)

View of AnnData object with n_obs × n_vars = 23598 × 29149
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'


In [24]:
batch = np.ones((3,), dtype=int)
print(batch)

[1 1 1]


In [28]:
X = s17.values

In [30]:
def populateAnnData(mat,adata, final):
    import anndata
    adata = anndata.AnnData(X=mat)
    adata.var_names = s17.columns.values
    adata.obs['cell_types'] = s17.index.values
    adata.obs['batch_indices'] = batch
    return adata

bulk17 = populateAnnData(X, s17, batch)
print(bulk17)

AnnData object with n_obs × n_vars = 3 × 57773
    obs: 'cell_types', 'batch_indices'


In [21]:
adata = ad.read_h5ad('/home/mcb/users/ssue1/DECON/data/MDD/mdd_healthy_sc.h5ad')
print(adata)

AnnData object with n_obs × n_vars = 32700 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'


In [24]:
mdd_df = pd.DataFrame(adata_filter.X, index=adata_filter.obs.index.values, columns=adata_filter.var.index.values, dtype='int8')
print(mdd_df)

       RP11-34P13.3  RP11-34P13.7  RP11-34P13.14  FO538757.3  FO538757.2  \
0                 0             0              0           0           1   
1                 0             0              0           0           1   
2                 0             0              0           0           1   
3                 0             0              0           0           0   
4                 0             0              0           0           0   
...             ...           ...            ...         ...         ...   
75653             0             0              0           0           0   
75654             0             0              0           0           1   
75655             0             0              0           0           1   
75657             0             0              0           0           0   
75658             0             0              0           0           0   

       AP006222.2  RP5-857K21.15  RP4-669L17.2  RP4-669L17.10  RP5-857K21.4  \
0       

In [25]:
mdd_df = mdd_df.T
print(mdd_df)

               0  1  2  3  4  6  7  8  9  10  ...  75648  75649  75650  75651  \
RP11-34P13.3   0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
RP11-34P13.7   0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
RP11-34P13.14  0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
FO538757.3     0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
FO538757.2     1  1  1  0  0  1  1  0  0   0  ...      1      0      0      0   
...           .. .. .. .. .. .. .. .. ..  ..  ...    ...    ...    ...    ...   
AL354822.1     0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
AC023491.2     0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
AC004556.1     0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
AC240274.1     0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   
FAM231B        0  0  0  0  0  0  0  0  0   0  ...      0      0      0      0   

               75652  75653

In [None]:
pheno = pd.DataFrame(adata_filter.obs.cell_types)
print(pheno)

In [None]:
pheno['Subject'] = adata_filter.obs.Subject
print(pheno)

In [None]:
mdd_df.to_csv('/home/mcb/users/ssue1/DECON/data/MDD/mdd_healthy_lake_music.csv')

In [5]:
pheno.to_csv('/home/mcb/users/ssue1/DECON/data/MDD/mdd_healthy__lake_pheno_music.csv')

In [2]:
adata = ad.read_h5ad('/home/mcb/users/ssue1/DECON/data/MDD/mdd_sc.h5ad')
print(adata)

AnnData object with n_obs × n_vars = 78886 × 30062
    obs: 'cell_types', 'Cell_SubType', 'Subject', 'Diagnosis', 'batch_indices'


In [3]:
nagy_df = pd.DataFrame(adata.X, index=adata.obs.index.values, columns=adata.var.index.values)
print(nagy_df)

       RP11-34P13.3  RP11-34P13.7  RP11-34P13.14  FO538757.3  FO538757.2  \
0               0.0           0.0            0.0         0.0         1.0   
1               0.0           0.0            0.0         0.0         1.0   
2               0.0           0.0            0.0         0.0         1.0   
3               0.0           0.0            0.0         0.0         0.0   
4               0.0           0.0            0.0         0.0         0.0   
...             ...           ...            ...         ...         ...   
78881           0.0           0.0            0.0         0.0         0.0   
78882           0.0           0.0            0.0         0.0         0.0   
78883           0.0           0.0            0.0         0.0         0.0   
78884           0.0           0.0            0.0         0.0         0.0   
78885           0.0           0.0            0.0         0.0         0.0   

       AP006222.2  RP5-857K21.15  RP4-669L17.2  RP4-669L17.10  RP5-857K21.4  \
0       

In [4]:
nagy_df['subject'] = adata.obs.Subject.values
nagy_df['celltype'] = adata.obs.cell_types.values
print(nagy_df)
nagy_df = nagy_df[~nagy_df.celltype.str.contains('Mix')]
nagy_df = nagy_df[~nagy_df.celltype.str.contains('Micro/Macro')]
print(nagy_df)

       RP11-34P13.3  RP11-34P13.7  RP11-34P13.14  FO538757.3  FO538757.2  \
0               0.0           0.0            0.0         0.0         1.0   
1               0.0           0.0            0.0         0.0         1.0   
2               0.0           0.0            0.0         0.0         1.0   
3               0.0           0.0            0.0         0.0         0.0   
4               0.0           0.0            0.0         0.0         0.0   
...             ...           ...            ...         ...         ...   
78881           0.0           0.0            0.0         0.0         0.0   
78882           0.0           0.0            0.0         0.0         0.0   
78883           0.0           0.0            0.0         0.0         0.0   
78884           0.0           0.0            0.0         0.0         0.0   
78885           0.0           0.0            0.0         0.0         0.0   

       AP006222.2  RP5-857K21.15  RP4-669L17.2  RP4-669L17.10  RP5-857K21.4  \
0       

In [6]:
cell_types = sorted(nagy_df['celltype'].unique())
print(cell_types)

['Astros', 'Endo', 'Ex', 'Inhib', 'OPCs', 'Oligos']


In [7]:
no_ct = nagy_df.drop(['celltype'], axis=1)

In [9]:
unique = sorted(nagy_df['subject'].unique())
print(unique)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]


In [10]:
bulk = []
for i in unique:
    subset = no_ct[no_ct['subject']==i]
    subset = subset.set_index('subject')
    subset_sum = subset.sum()
    bulk.append(subset_sum.values)
print(bulk)

[array([ 5., 12.,  0., ...,  1., 63.,  0.], dtype=float32), array([ 2.,  7.,  0., ...,  1., 32.,  0.], dtype=float32), array([ 0.,  6.,  0., ...,  0., 25.,  0.], dtype=float32), array([ 2., 29.,  0., ...,  2., 61.,  0.], dtype=float32), array([ 0., 10.,  0., ...,  2., 18.,  0.], dtype=float32), array([ 3., 13.,  0., ...,  0., 52.,  0.], dtype=float32), array([  2.,  10.,   0., ...,   3., 100.,   0.], dtype=float32), array([ 3., 26.,  0., ...,  2., 20.,  0.], dtype=float32), array([ 0., 13.,  0., ..., 41., 22.,  0.], dtype=float32), array([ 0., 15.,  0., ...,  1., 74.,  0.], dtype=float32), array([ 0., 57.,  0., ...,  9., 88.,  0.], dtype=float32), array([ 0., 13.,  0., ..., 40., 31.,  0.], dtype=float32), array([ 1., 18.,  0., ...,  2., 25.,  0.], dtype=float32), array([ 1., 28.,  0., ...,  2., 29.,  0.], dtype=float32), array([ 0., 19.,  0., ...,  1., 35.,  0.], dtype=float32), array([ 2., 17.,  0., ..., 19., 44.,  0.], dtype=float32), array([ 0., 26.,  0., ...,  5., 87.,  0.], dtype=

In [11]:
bulk_array = np.vstack(bulk)
print(bulk_array)

[[ 5. 12.  0. ...  1. 63.  0.]
 [ 2.  7.  0. ...  1. 32.  0.]
 [ 0.  6.  0. ...  0. 25.  0.]
 ...
 [ 5. 36.  0. ... 14. 25.  0.]
 [ 3. 20.  1. ... 12. 45.  0.]
 [ 1. 34.  0. ... 25. 75.  0.]]


In [12]:
bulk_df = pd.DataFrame(bulk_array, index=unique, columns=adata.var.index.values)
print(bulk_df)

    RP11-34P13.3  RP11-34P13.7  RP11-34P13.14  FO538757.3  FO538757.2  \
1            5.0          12.0            0.0         0.0       978.0   
2            2.0           7.0            0.0         0.0      1420.0   
3            0.0           6.0            0.0         0.0       558.0   
4            2.0          29.0            0.0         0.0      1570.0   
5            0.0          10.0            0.0         0.0      1056.0   
6            3.0          13.0            0.0         0.0       764.0   
7            2.0          10.0            0.0         1.0      1565.0   
8            3.0          26.0            0.0         0.0       711.0   
9            0.0          13.0            0.0         0.0       387.0   
10           0.0          15.0            0.0         1.0       691.0   
11           0.0          57.0            0.0         0.0       709.0   
12           0.0          13.0            0.0         0.0       893.0   
13           1.0          18.0            0.0      

In [13]:
bulk_df.to_csv('/home/mcb/users/ssue1/DECON/data/MDD/nagy_bulk.csv')

In [14]:
proportions = []
for i in unique:
    sub_proportions=[]
    subset = nagy_df[nagy_df['subject']==i]
    for c in cell_types:
        count = subset[subset['celltype'].str.contains(c)]
        frac = (len(count))/len(subset)
        sub_proportions.append(frac)
    proportions.append(sub_proportions)

In [15]:
proportions_array = np.vstack(proportions)
print(proportions_array)

[[4.34637245e-03 1.33734537e-03 7.33199599e-01 1.73186225e-01
  3.77800067e-02 5.01504514e-02]
 [1.58403869e-01 3.02297461e-03 6.19709794e-01 1.36638452e-01
  3.44619105e-02 4.77629988e-02]
 [1.24338624e-01 8.81834215e-03 3.83597884e-01 2.83950617e-01
  4.14462081e-02 1.57848325e-01]
 [1.70430337e-03 1.70430337e-03 7.21772476e-01 2.54367277e-01
  3.83468257e-03 1.66169578e-02]
 [4.01974612e-02 4.93653032e-03 6.00141044e-01 1.85472496e-01
  5.00705219e-02 1.19181946e-01]
 [8.57933579e-02 2.30627306e-03 6.28690037e-01 1.77121771e-01
  2.85977860e-02 7.74907749e-02]
 [1.08630313e-01 2.14684414e-03 6.24302276e-01 1.36109918e-01
  3.69257192e-02 9.18849292e-02]
 [2.96882731e-02 5.93765463e-03 7.10539337e-01 2.14250371e-01
  1.88025730e-02 2.07817912e-02]
 [8.92098556e-02 1.18946474e-02 4.42650807e-01 2.91418862e-01
  6.54205607e-02 9.94052676e-02]
 [7.33074601e-03 4.31220354e-03 6.96420871e-01 1.42302717e-01
  3.10478655e-02 1.18585597e-01]
 [1.18413755e-01 7.21020521e-03 5.87354409e-01 1.5

In [16]:
proportions_df = pd.DataFrame(proportions_array, index=unique, columns=cell_types)
print(proportions_df)

      Astros      Endo        Ex     Inhib      OPCs    Oligos
1   0.004346  0.001337  0.733200  0.173186  0.037780  0.050150
2   0.158404  0.003023  0.619710  0.136638  0.034462  0.047763
3   0.124339  0.008818  0.383598  0.283951  0.041446  0.157848
4   0.001704  0.001704  0.721772  0.254367  0.003835  0.016617
5   0.040197  0.004937  0.600141  0.185472  0.050071  0.119182
6   0.085793  0.002306  0.628690  0.177122  0.028598  0.077491
7   0.108630  0.002147  0.624302  0.136110  0.036926  0.091885
8   0.029688  0.005938  0.710539  0.214250  0.018803  0.020782
9   0.089210  0.011895  0.442651  0.291419  0.065421  0.099405
10  0.007331  0.004312  0.696421  0.142303  0.031048  0.118586
11  0.118414  0.007210  0.587354  0.150860  0.017471  0.118691
12  0.124260  0.010848  0.533037  0.169132  0.036489  0.126233
13  0.080689  0.005893  0.656392  0.172711  0.027199  0.057117
14  0.001847  0.004926  0.626847  0.279557  0.026478  0.060345
15  0.076923  0.008514  0.616853  0.187610  0.036406  0

In [17]:
proportions_df.to_csv('/home/mcb/users/ssue1/DECON/data/MDD/nagy_bulk_proportions.csv')

In [18]:
batch = np.ones((34,), dtype=int)
print(batch)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [20]:
X = bulk_df.values

In [21]:
def populateAnnData(X,bulk_df, batch):
    import anndata
    adata = anndata.AnnData(X=X)
    adata.var_names = bulk_df.columns.values
    adata.obs['cell_types'] = bulk_df.index.values
    adata.obs['batch_indices'] = batch
    return adata

adata = populateAnnData(X, bulk_df, batch)
print(adata)

AnnData object with n_obs × n_vars = 34 × 30062
    obs: 'cell_types', 'batch_indices'


In [22]:
print(adata.X)

[[ 5. 12.  0. ...  1. 63.  0.]
 [ 2.  7.  0. ...  1. 32.  0.]
 [ 0.  6.  0. ...  0. 25.  0.]
 ...
 [ 5. 36.  0. ... 14. 25.  0.]
 [ 3. 20.  1. ... 12. 45.  0.]
 [ 1. 34.  0. ... 25. 75.  0.]]


In [23]:
import scanpy as sc
sc.pp.log1p(adata)

In [24]:
from sklearn import preprocessing as pp

def sample_scaling(x):
    mms = pp.MinMaxScaler(feature_range=(0, 1), copy=True)
    # it scales features so transpose is needed
    x = mms.fit_transform(x.T).T
    return x

adata.X = sample_scaling(adata.X)
print(adata)

AnnData object with n_obs × n_vars = 34 × 30062
    obs: 'cell_types', 'batch_indices'
    uns: 'log1p'


In [25]:
adata.write('/home/mcb/users/ssue1/DECON/data/MDD/nagy_bulk_pp.h5ad')