# MERFISH Experiment clustering

## Goals
- generate adata containing cell_by_gene matrix and cooridinates of each cell
- quality control to select cells

In [None]:
from anndata import AnnData
import scanpy as sc
#import squidpy as sq

from numpy.random import default_rng

import matplotlib.pyplot as plt
import pandas as pd
from ALLCools.plot import *
from ALLCools.clustering import tsne
from merfishing import Merfish
import seaborn as sns
from merfishing.core import clustering
import pathlib

In [None]:
data_dir = 'output'
model = 'watershed'
region = 'region_1'

In [None]:
if model == 'cellpose':
    cell_by_gene = pd.read_csv(f'{data_dir}/{region}/cell_by_gene.cellpose.csv.gz', index_col = 0).sort_index()
    cell_meta = pd.read_csv(f'{data_dir}/{region}/cell_metadata.cellpose.csv.gz', index_col=0).sort_index()
elif model == 'watershed':
    cell_by_gene = pd.read_csv(f'{data_dir}/{region}/cell_by_gene.csv.gz', index_col = 0).sort_index()
    cell_meta = pd.read_csv(f'{data_dir}/{region}/cell_metadata.csv.gz', index_col=0).sort_index()
    

In [None]:
shared = list(set(cell_meta.index) & set(cell_by_gene.index))  
cell_meta = cell_meta.loc[shared]

## QC and selecting cells for further analysis

In [None]:
# visualize QC metrics as a violoin plot
clustering.plot_qc_feature(cell_by_gene, cell_meta)

In [None]:
cell_by_gene,cell_meta = clustering.qc_before_clustering(cell_meta, 
                                                         cell_by_gene,  
                                                         snmfish_genes=None,
                                                         blank_gene_sum_high=5,
                                                         z_number=None,
                                                         volume_low=30,
                                                         volumn_high=2000,
                                                         transcript_sum_low=10,
                                                         transcript_sum_high=4000,
                                                         tanscripts_per_volume_low=0.05,
                                                         tanscripts_per_volume_high=5,)

# save

In [None]:
pathlib.Path("qc_meta").mkdir(parents=True, exist_ok=True)
assert cell_by_gene.shape[0] == cell_meta.shape[0]

cell_by_gene.to_csv(f'./qc_meta/{region}_{model}_afterQC_cell_by_gene.csv', header = True, index = True)
cell_meta.to_csv(f'./qc_meta/{region}_{model}_afterQC_cell_meta.csv', header = True, index = True)