In [1]:
## installing the package
! pip uninstall -q -y cellink
! pip install -q ../../.
! pwd

/home/lollo/Work/code/repos/forks/sc-genetics/docs/notebooks


In [13]:
## importing the libraries
from pathlib import Path
import os
import pandas as pd

from scanpy import read_h5ad

from cellink import DonorData
from cellink.io import read_plink
from cellink.tl import EQTLDataManager, EQTLPipeline

In [3]:
## defining the path to the data
scdata_path = "/home/lollo/Work/hackathon/data/Yazar_OneK1K/debug_OneK1K_cohort_gene_expression_matrix_14_celltypes.h5ad"
annotation_path = "/home/lollo/Work/hackathon/data/Yazar_OneK1K/gene_annotation.csv"
gdata_path = "/home/lollo/Work/hackathon/data/Yazar_OneK1K/OneK1K_imputation_post_qc_r2_08/plink/"
dump_path = "/home/lollo/Work/hackathon/out/dumps"

In [10]:
## defining target chromosome and cell type
target_chrom = "22"
target_cell_type = "CD4 ET"

In [9]:
## reading single cell data
scdata = read_h5ad(scdata_path)
## reading annotation data
annotation_df = pd.read_csv(annotation_path).loc[
    :, ["ensembl_gene_id", "start_position", "end_position", "chromosome_name"]
]
annotation_df = annotation_df.loc[annotation_df.ensembl_gene_id.isin(scdata.var_names)]
## merging the scdata.var df with the annotations
merged_df = pd.merge(scdata.var, annotation_df, left_index=True, right_on="ensembl_gene_id")
merged_df = merged_df.rename(
    columns={
        "ensembl_gene_id": "Geneid",
        "start_position": "start",
        "end_position": "end",
        "chromosome_name": "chrom",
    }
)
merged_df.index = merged_df["Geneid"]
scdata.var = merged_df

In [14]:
## loading the genetics data
plink_file = os.path.join(gdata_path, f"chr{target_chrom}.dose.filtered.R2_0.8")
gdata = read_plink(plink_file)

In [15]:
## constructing donor data
data = DonorData(adata=scdata, gdata=gdata, donor_key_in_sc_adata="individual")

[2024-11-13 12:44:15,362] INFO:cellink._core.donordata: Keeping 20/1034 donors
[2024-11-13 12:44:15,362] INFO:cellink._core.donordata: Dropping 1014/1034 donors from genetic data
[2024-11-13 12:44:15,363] INFO:cellink._core.donordata: Dropping 0/20 donors from single-cell data


In [23]:
## initializing eqtl data
eqtl_data = EQTLDataManager(data,n_sc_comps=15,n_genetic_pcs=15)

In [24]:
## configuring pipeline
eqtl = EQTLPipeline(eqtl_data)

In [25]:
## running the GWAS study
results_df_all = eqtl.run(target_cell_type, target_chrom)

  reference_data = reference_data.groupby(self.donor_key_in_scdata).agg(["unique"])
  reference_data = reference_data.groupby(self.donor_key_in_scdata).agg(["unique"])


[2024-11-13 12:45:49,967] INFO:cellink._core.donordata: Keeping 20/20 donors
[2024-11-13 12:45:49,968] INFO:cellink._core.donordata: Dropping 0/20 donors from genetic data
[2024-11-13 12:45:49,968] INFO:cellink._core.donordata: Dropping 0/20 donors from single-cell data


100%|██████████| 236/236 [07:06<00:00,  1.81s/it]
