In [1]:
import os
import sys
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import anndata as ad
import pickle

In [2]:
from SenCID.api import SenCID

  _config = yaml.load(open(_config_path))


In [3]:
sc.settings.verbosity = 0
pd.options.mode.chained_assignment = None

In [4]:
metadata = pd.read_csv("GanierDatasets/Metadata/E-MTAB-13084.sdrf.txt", sep = '\t')
metadata = metadata[metadata['Comment[read_type]'] == 'spatial_barcode,umi_barcode']
relevant_columns = [
    'Source Name', 
    'Characteristics[age]', 
    'Characteristics[sex]',
    'Characteristics[sampling site]',
    'Factor Value[disease]',
    'Characteristics[sample id]'
]
                    
metadata = metadata[relevant_columns]
metadata.drop_duplicates(inplace = True)
metadata.reset_index(drop = True, inplace = True)

seen_datasets = ['WSSKNKCLsp10446618', 'WSSKNKCLsp12887265']
metadata = metadata[~metadata['Source Name'].isin(seen_datasets)]
metadata.reset_index(drop = True, inplace = True)

metadata['Region'] = metadata.apply(
    lambda x : 'face' if 'face' in x['Characteristics[sample id]'] else 'body',
    axis = 1
)
metadata['Factor Value[disease]'] = metadata.apply(
    lambda x : x['Factor Value[disease]'] == 'basal cell carcinoma',
    axis = 1
)

metadata = metadata.rename(
    columns = {
        'Characteristics[age]' : 'Age',
        'Characteristics[sex]' : 'Sex',
        'Characteristics[sampling site]' : 'Site',
        'Factor Value[disease]' : 'BCC',
    }
)
metadata.drop('Characteristics[sample id]', axis = 1, inplace = True)

metadata

Unnamed: 0,Source Name,Age,Sex,Site,BCC,Region
0,WSSKNKCLsp10446613,68,male,temple,False,face
1,WSSKNKCLsp10446614,68,male,temple,False,face
2,WSSKNKCLsp10446615,33,female,temple,False,face
3,WSSKNKCLsp10446616,33,female,temple,False,face
4,WSSKNKCLsp10446617,77,male,nose,False,face
5,WSSKNKCLsp10446619,55,male,forehead,False,face
6,WSSKNKCLsp10446620,39,male,cheek,False,face
7,WSSKNKCLsp10446621,60,male,back,False,body
8,WSSKNKCLsp10446623,55,male,inguinal part of abdomen,False,body
9,WSSKNKCLsp10767965,47,male,abdomen,False,body


In [6]:
auto_fblst_SIDs = []
auto_kcyte_SIDs = []
auto_endth_SIDs = []

manual_SIDs = {'fblst' : 'SID5', 'kcyte' : 'SID2', 'endth' : 'SID4'}

for x in metadata.iterrows():
    # Load adata
    slide_name = x[1]['Source Name']
    adata = sc.read_visium(
        f'GanierDatasets/{slide_name}',
        count_file = 'filtered_feature_bc_matrix.h5'
    )
    adata.var_names_make_unique()

    adata.var['mt'] = adata.var_names.str.startswith("MT-")
    sc.pp.calculate_qc_metrics(
        adata, qc_vars = ['mt'], inplace = True, log1p = False
    )
    sc.pp.filter_cells(adata, min_genes = 200)
    adata = adata[adata.obs.pct_counts_mt < 30, :].copy()
    
    adata.obs['condition'] = [x.split('_')[-1] == 'OKSM' for x in adata.obs_names]
    
    # Get ctype information
    adata.obs['cell type'] = pd.read_csv(f'SenCID/Cell Types/{slide_name}.csv', index_col = 'Unnamed: 0')['cell type']
    
    # Classify for this slide
    pred_dict, recSID, tmpfiles = SenCID(adata = adata,
                                        sidnums = [1,2,3,4,5,6],
                                        denoising = True,
                                        binarize = True,
                                        threads = 8,
                                        savetmp = True
                                    )
    
    adata.obs = pd.concat([adata.obs, recSID.loc[adata.obs_names, :]], axis = 1)
    markers = ['rec_SID1', 'rec_SID2', 'rec_SID3', 'rec_SID4', 'rec_SID5', 'rec_SID6']

    # Put it in a dataframe to make it easier to work with
    pred_dict_copy = pred_dict.copy()
    
    for id in pred_dict_copy:
        pred_dict_copy[id].columns = [id + '_' + x for x in pred_dict_copy[id].columns]
    
    predictions = pd.concat([pred_dict_copy[x] for x in pred_dict_copy] + [adata.obs['cell type']], axis = 1)
    
    auto_predictions = []
    manual_predictions = []
    
    for ctype in ['endth', 'kcyte', 'fblst']:
        subset = predictions[predictions['cell type'] == ctype]
        auto_SID = subset[[x for x in subset if x[5:] == 'SID_Score']].mean().idxmax()[:4]
        manual_SID = manual_SIDs[ctype]
        
        auto_predictions.append(subset[f'{auto_SID}_Binarization'])
        manual_predictions.append(subset[f'{manual_SID}_Binarization'])
        
    predictions['auto predictions'] = pd.concat(auto_predictions)
    predictions['manual predictions'] = pd.concat(manual_predictions)
    
    adata.obs['SenCID auto preds'] = predictions['auto predictions']
    adata.obs['SenCID manual preds'] = predictions['manual predictions']
    
    adata.obs[['SenCID auto preds', 'SenCID manual preds']].to_csv(f'SenCID/Outputs/{slide_name}.csv')

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 986 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 924 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1289 genes and 1520 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1289 genes and 1484 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1547 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1289 genes and 1014 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 2028 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1283 genes and 1432 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1288 genes and 1297 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1288 genes and 1495 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1289 genes and 1766 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1287 genes and 1630 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1288 genes and 1945 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1289 genes and 811 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1289 genes and 1042 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1868 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1186 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1957 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 958 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1639 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1380 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1286 genes and 2086 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 2448 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1342 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1334 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 1146 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Finished. Giving SID scores and SID Recommendation...
Scaling data...
dca: Successfully preprocessed 1290 genes and 930 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...


Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Scaling data...
dca: Successfully preprocessed 1290 genes and 928 cells.
dca: Calculating reconstructions...
Loading models of SID1...
Making predictions of SID1...
Loading models of SID2...
Making predictions of SID2...
Loading models of SID3...
Making predictions of SID3...
Loading models of SID4...
Making predictions of SID4...
Loading models of SID5...
Making predictions of SID5...
Loading models of SID6...
Making predictions of SID6...
Loading Recommend model...
Finished. Giving SID scores and SID Recommendation...
