Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding files from scib #4

Merged
merged 14 commits into from Jun 14, 2021
Merged
655 changes: 655 additions & 0 deletions notebooks/Vitualisation_ATAC.ipynb

Large diffs are not rendered by default.

271 changes: 271 additions & 0 deletions notebooks/analysis/ATAC_feature_analysis.ipynb
@@ -0,0 +1,271 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from collections import defaultdict\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"atac1 = pd.read_csv('../../../Paper/202010_Revision/Supplementary Files/Results/ATAC/mouse_brain_atac_large_11batches_summary_scores.csv')\n",
"atac2 = pd.read_csv('../../../Paper/202010_Revision/Supplementary Files/Results/ATAC/mouse_brain_atac_small_summary_scores.csv')\n",
"atac = pd.concat([atac1,atac2])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Average bio conservation score of methods for atac"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Unnamed: 0', 'Method', 'Output', 'Feature Space', 'Overall Score',\n",
" 'Batch Correction', 'PCR batch', 'Batch ASW', 'iLISI',\n",
" 'graph connectivity', 'kBET', 'Bio conservation', 'NMI cluster/label',\n",
" 'ARI cluster/label', 'Cell type ASW', 'isolated label F1',\n",
" 'isolated label silhouette', 'cLISI'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"atac1.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Filter each dataset to only contain methods that are in all 3 feature spaces\n",
"\n",
"atac1 = atac1.loc[~np.isnan(atac1['Overall Score']),:]\n",
"atac1['meth_out'] = ['_'.join(atac1[['Method', 'Output']].values[i]) for i in range(atac1.shape[0])]\n",
"\n",
"meth_id_list = atac1.meth_out.value_counts().index[atac1.meth_out.value_counts() == 3]\n",
"atac1 = atac1.loc[atac1.meth_out.isin(meth_id_list)].reset_index(drop='index')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Filter each dataset to only contain methods that are in all 3 feature spaces\n",
"\n",
"atac2 = atac2.loc[~np.isnan(atac2['Overall Score']),:]\n",
"atac2['meth_out'] = ['_'.join(atac2[['Method', 'Output']].values[i]) for i in range(atac2.shape[0])]\n",
"\n",
"meth_id_list = atac2.meth_out.value_counts().index[atac2.meth_out.value_counts() == 3]\n",
"atac2 = atac2.loc[atac2.meth_out.isin(meth_id_list)].reset_index(drop='index')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DESC_embed 3\n",
"BBKNN_graph 3\n",
"Conos_graph 3\n",
"SAUCIE_embed 3\n",
"SAUCIE_gene 3\n",
"ComBat_gene 3\n",
"Harmony_embed 3\n",
"LIGER_embed 3\n",
"Unintegrated_gene 3\n",
"Name: meth_out, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"scVI_embed 3\n",
"Seurat v3 RPCA_gene 3\n",
"fastMNN_embed 3\n",
"ComBat_gene 3\n",
"MNN_gene 3\n",
"Seurat v3 CCA_gene 3\n",
"BBKNN_graph 3\n",
"scANVI*_embed 3\n",
"LIGER_embed 3\n",
"DESC_embed 3\n",
"scGen*_gene 3\n",
"Scanorama_embed 3\n",
"Harmony_embed 3\n",
"Scanorama_gene 3\n",
"fastMNN_gene 3\n",
"trVAE_embed 3\n",
"SAUCIE_gene 3\n",
"Conos_graph 3\n",
"SAUCIE_embed 3\n",
"Unintegrated_gene 3\n",
"Name: meth_out, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"atac1.meth_out.value_counts()\n",
"atac2.meth_out.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"atac_filt = pd.concat([atac1,atac2])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Feature Space\n",
"genes 0.406883\n",
"peaks 0.644815\n",
"windows 0.612147\n",
"Name: Bio conservation, dtype: float64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"atac_filt.groupby('Feature Space')['Bio conservation'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Remove unintegrated from this\n",
"atac_filt2 = atac_filt.loc[~atac_filt.meth_out.isin(['Unintegrated_gene'])].reset_index(drop='index')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Feature Space\n",
"genes 0.395273\n",
"peaks 0.628008\n",
"windows 0.595843\n",
"Name: Bio conservation, dtype: float64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"atac_filt2.groupby('Feature Space')['Bio conservation'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Feature Space\n",
"genes 0.655828\n",
"peaks 0.499953\n",
"windows 0.477366\n",
"Name: Batch Correction, dtype: float64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"atac_filt2.groupby('Feature Space')['Batch Correction'].mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Gene activity feature space has far lower bio conservation score than peaks and windows"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}