Get cumulative numbers of variants, targets and genes for hicov geographic cohorts 
and hicov all 

## Imports

In [1]:
import pandas as pd
import allel
import collections
import numpy as np
import malariagen_data
import gcsfs
import zarr
import dask
import dask.array as da
import cython
import functools
import os

In [2]:
from dask.distributed import Client
import dask
dask.config.set(**{'array.slicing.split_large_chunks': False}) # Silence large chunk warnings
import dask.array as da
from dask import delayed, compute
from dask_gateway import Gateway
import functools
import numcodecs
from fsspec.implementations.zip import ZipFileSystem
from collections.abc import Mapping
import gcsfs
import numba
import psutil
from humanize import naturalsize


In [None]:
gcs = gcsfs.GCSFileSystem()

## Read ref genome, accessibility and annotation

### Ref genome

In [5]:
#Load af1 to access reference genome
#Sequence will be read for chromosomes separately
af1 = malariagen_data.Af1(release='1.0')

### Annotation

In [6]:
#Uploaded local copy of annotation
#Because the release contains a preliminary version
gff_fn = 'VectorBase-65_AfunestusAfunGA1.gff'
features = allel.FeatureTable.from_gff3(gff_fn, attributes=['ID', 'Parent'])
features

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent,Unnamed: 11
0,AfunGA1_2RL,VEuPathDB,ncRNA_gene,44146272,44146344,-1.0,-,-1,AFUN2_000001,.,
1,AfunGA1_2RL,VEuPathDB,tRNA,44146272,44146344,-1.0,-,-1,AFUN2_000001.R2,AFUN2_000001,
2,AfunGA1_2RL,VEuPathDB,exon,44146272,44146344,-1.0,-,-1,exon_Trnak-cuu-2_t1-E1,AFUN2_000001.R2,
...,...,...,...,...,...,...,...,...,...,...,...
468230,AfunGA1_3RL,VEuPathDB,CDS,46442600,46443004,-1.0,+,0,AFUN2_014819.P27467-CDS2,AFUN2_014819.R27467,
468231,AfunGA1_3RL,VEuPathDB,five_prime_UTR,46441974,46442325,-1.0,+,-1,utr_AFUN2_014819.R27467_1,AFUN2_014819.R27467,
468232,AfunGA1_3RL,VEuPathDB,three_prime_UTR,46443005,46443055,-1.0,+,-1,utr_AFUN2_014819.R27467_2,AFUN2_014819.R27467,


In [7]:
#Eyeball the features present
collections.Counter(features.type).most_common()

[('exon', 193386),
 ('CDS', 161982),
 ('five_prime_UTR', 42469),
 ('three_prime_UTR', 26305),
 ('mRNA', 25342),
 ('protein_coding_gene', 12092),
 ('ncRNA_gene', 2280),
 ('lnc_RNA', 1474),
 ('rRNA', 1011),
 ('ncRNA', 646),
 ('pseudogene', 447),
 ('pseudogenic_transcript', 447),
 ('tRNA', 316),
 ('snRNA', 27),
 ('snoRNA', 9)]

In [8]:
# index features by their parent
idx_feature_parent = dict(features[['ID', 'Parent']])
idx_feature_parent['AFUN2_000001.R2']

'AFUN2_000001'

In [9]:
#Check scaffold names
collections.Counter(features.seqid).most_common()[:10]

[('AfunGA1_2RL', 224647),
 ('AfunGA1_3RL', 188399),
 ('AfunGA1_X', 45230),
 ('CALSEJ010000020', 656),
 ('CALSEJ010000036', 305),
 ('CALSEJ010000073', 279),
 ('CALSEJ010000056', 223),
 ('CALSEJ010000119', 207),
 ('CALSEJ010000194', 204),
 ('CALSEJ010000306', 203)]

In [10]:
# subset to features annotated on the chromosomes
loc_features_chroms = (
    (features.seqid == 'AfunGA1_2RL') |
    (features.seqid == 'AfunGA1_3RL') |
    (features.seqid == 'AfunGA1_X') 
)
features_chroms = features[loc_features_chroms]
features_chroms

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent,Unnamed: 11
0,AfunGA1_2RL,VEuPathDB,ncRNA_gene,44146272,44146344,-1.0,-,-1,AFUN2_000001,.,
1,AfunGA1_2RL,VEuPathDB,tRNA,44146272,44146344,-1.0,-,-1,AFUN2_000001.R2,AFUN2_000001,
2,AfunGA1_2RL,VEuPathDB,exon,44146272,44146344,-1.0,-,-1,exon_Trnak-cuu-2_t1-E1,AFUN2_000001.R2,
...,...,...,...,...,...,...,...,...,...,...,...
458273,AfunGA1_3RL,VEuPathDB,CDS,46442600,46443004,-1.0,+,0,AFUN2_014819.P27467-CDS2,AFUN2_014819.R27467,
458274,AfunGA1_3RL,VEuPathDB,five_prime_UTR,46441974,46442325,-1.0,+,-1,utr_AFUN2_014819.R27467_1,AFUN2_014819.R27467,
458275,AfunGA1_3RL,VEuPathDB,three_prime_UTR,46443005,46443055,-1.0,+,-1,utr_AFUN2_014819.R27467_2,AFUN2_014819.R27467,


In [11]:
#Check that it worked
np.unique(features_chroms.seqid)

array(['AfunGA1_2RL', 'AfunGA1_3RL', 'AfunGA1_X'], dtype=object)

In [12]:
#subset to coding sequences on the chromosomes
cdss_chroms = features_chroms[features_chroms.type == 'CDS']
cdss_chroms

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent,Unnamed: 11
0,AfunGA1_X,VEuPathDB,CDS,16057118,16058505,-1.0,-,2,AFUN2_000317.P27478-CDS9,AFUN2_000317.R27478,
1,AfunGA1_X,VEuPathDB,CDS,16057118,16058505,-1.0,-,2,AFUN2_000317.P27477-CDS9,AFUN2_000317.R27477,
2,AfunGA1_X,VEuPathDB,CDS,16057118,16058505,-1.0,-,2,AFUN2_000317.P27471-CDS8,AFUN2_000317.R27471,
...,...,...,...,...,...,...,...,...,...,...,...
161212,AfunGA1_3RL,VEuPathDB,CDS,22228717,22228942,-1.0,+,1,AFUN2_014818.P27463-CDS9,AFUN2_014818.R27463,
161213,AfunGA1_3RL,VEuPathDB,CDS,46442326,46442514,-1.0,+,0,AFUN2_014819.P27467-CDS1,AFUN2_014819.R27467,
161214,AfunGA1_3RL,VEuPathDB,CDS,46442600,46443004,-1.0,+,0,AFUN2_014819.P27467-CDS2,AFUN2_014819.R27467,


In [13]:
#subset to protein coding genes on the chromosomes
genes_coding_chroms = features_chroms[features_chroms.type =='protein_coding_gene']
genes_coding_chroms

Unnamed: 0,seqid,source,type,start,end,score,strand,phase,ID,Parent,Unnamed: 11
0,AfunGA1_X,VEuPathDB,protein_coding_gene,16056398,16128681,-1.0,-,-1,AFUN2_000317,.,
1,AfunGA1_2RL,VEuPathDB,protein_coding_gene,28414528,28429433,-1.0,+,-1,AFUN2_000318,.,
2,AfunGA1_2RL,VEuPathDB,protein_coding_gene,4506748,4514391,-1.0,-,-1,AFUN2_000319,.,
...,...,...,...,...,...,...,...,...,...,...,...
11959,AfunGA1_2RL,VEuPathDB,protein_coding_gene,33077479,33078614,-1.0,-,-1,AFUN2_014817,.,
11960,AfunGA1_3RL,VEuPathDB,protein_coding_gene,22217243,22232561,-1.0,+,-1,AFUN2_014818,.,
11961,AfunGA1_3RL,VEuPathDB,protein_coding_gene,46441974,46443055,-1.0,+,-1,AFUN2_014819,.,


In [14]:
# double-check that you get the same number of protein coding genes via cds
uq_genes_coding_chroms = np.unique([idx_feature_parent[t] for t in cdss_chroms.Parent])
assert set(genes_coding_chroms.ID) == set(uq_genes_coding_chroms)
len(uq_genes_coding_chroms)

11962

# Functions

In [15]:
%load_ext cython

Don't know how to safely use comments in cython - so I'll include them here

`%%cython` instructs to compile this cell with cython. the `-a` flag tells it to showcase the compilation.  
Next define the nucleotides as integers corresponding to their ascii value.  
`@cython.boundscheck(False)` tells cython to not perform a boundscheck -- so we have to guarantee the bounds.  
`opt_is_cas9_target()` takes as arguments a 'view' of seq (which we explicitly say cannot be None), a 'view' of is_variant and a boolean specifying whether to check reverse complements. I guess a view is an dynamic array?  
Initialise the `out` array, a counter `i` and boleans `fwd`, `rev` and fill `out` with as many zeroes as `seq` is long (but now make it a 1bit integer, whereas it was initialised as an 8bit integer?)  
Then release the GIL (it's something of a global lock on python code, and it has to be released to allow for multithreading).  
Loop through the positions in seq, taking into account the length of the targetsite. Check that of 21 consecutive positions, none are `N` (except the first of the PAM, which can be anything) and it ends in `GG`. Returns `fwd = True` if this check is satisfied. In that case, that that `is_variant` is zero at all positions except the third last (so if we give a boolean array of where bases are segregating within the population, it gets targets that are non-segregating; but if we give a boolean array of where bases are non-accessbile, it gets targets that are accessible). Repeat in reverse complement if `revcomp = True`.  
After finishing the loop, return a view of an array of booleans specifiying for each target starting postition whether its a valid target either forwards or reverse. 

In [16]:
%%cython

import numpy as np
cimport numpy as cnp
cimport cython

cdef:
    cnp.uint8_t A = ord('A')
    cnp.uint8_t C = ord('C')
    cnp.uint8_t G = ord('G')
    cnp.uint8_t T = ord('T')
    cnp.uint8_t N = ord('N')
    
@cython.boundscheck(False)
def opt_is_cas9_target(cnp.uint8_t[:] seq not None, cnp.uint8_t[:] is_variant, bint revcomp=True):
    """Locate Cas9 target sequences."""
    
    cdef:
        cnp.uint8_t[:] out
        Py_ssize_t i
        bint fwd, rev
        
    out = np.zeros(seq.shape[0], dtype='u1')
    
    with nogil:
        for i in range(seq.shape[0] - 20):
            # check if forward strand has -NGG
            fwd = ((seq[i] != N) and
                   (seq[i+1] != N) and
                   (seq[i+2] != N) and
                   (seq[i+3] != N) and
                   (seq[i+4] != N) and
                   (seq[i+5] != N) and
                   (seq[i+6] != N) and
                   (seq[i+7] != N) and
                   (seq[i+8] != N) and
                   (seq[i+9] != N) and
                   (seq[i+10] != N) and
                   (seq[i+11] != N) and
                   (seq[i+12] != N) and
                   (seq[i+13] != N) and
                   (seq[i+14] != N) and
                   (seq[i+15] != N) and
                   (seq[i+16] != N) and
                   (seq[i+17] != N) and
                   # anything and
                   (seq[i+19] == G) and 
                   (seq[i+20] == G))
            if fwd and is_variant is not None:
                fwd = (fwd and 
                       (is_variant[i] == 0) and
                       (is_variant[i+1] == 0) and
                       (is_variant[i+2] == 0) and
                       (is_variant[i+3] == 0) and
                       (is_variant[i+4] == 0) and
                       (is_variant[i+5] == 0) and
                       (is_variant[i+6] == 0) and
                       (is_variant[i+7] == 0) and
                       (is_variant[i+8] == 0) and
                       (is_variant[i+9] == 0) and
                       (is_variant[i+10] == 0) and
                       (is_variant[i+11] == 0) and
                       (is_variant[i+12] == 0) and
                       (is_variant[i+13] == 0) and
                       (is_variant[i+14] == 0) and
                       (is_variant[i+15] == 0) and
                       (is_variant[i+16] == 0) and
                       (is_variant[i+17] == 0) and
                       # anything and
                       (is_variant[i+19] == 0) and
                       (is_variant[i+20] == 0))
            if revcomp:
                # check if reverse strand has -NGG
                rev = ((seq[i] == C) and
                       (seq[i+1] == C) and
                       # anything and
                       (seq[i+3] != N) and
                       (seq[i+4] != N) and
                       (seq[i+5] != N) and
                       (seq[i+6] != N) and
                       (seq[i+7] != N) and
                       (seq[i+8] != N) and
                       (seq[i+9] != N) and
                       (seq[i+10] != N) and
                       (seq[i+11] != N) and
                       (seq[i+12] != N) and
                       (seq[i+13] != N) and
                       (seq[i+14] != N) and
                       (seq[i+15] != N) and
                       (seq[i+16] != N) and
                       (seq[i+17] != N) and
                       (seq[i+18] != N) and
                       (seq[i+19] != N) and 
                       (seq[i+20] != N))
                if rev and is_variant is not None:
                    rev = (rev and 
                           (is_variant[i] == 0) and
                           (is_variant[i+1] == 0) and
                           # anything and
                           (is_variant[i+3] == 0) and
                           (is_variant[i+4] == 0) and
                           (is_variant[i+5] == 0) and
                           (is_variant[i+6] == 0) and
                           (is_variant[i+7] == 0) and
                           (is_variant[i+8] == 0) and
                           (is_variant[i+9] == 0) and
                           (is_variant[i+10] == 0) and
                           (is_variant[i+11] == 0) and
                           (is_variant[i+12] == 0) and
                           (is_variant[i+13] == 0) and
                           (is_variant[i+14] == 0) and
                           (is_variant[i+15] == 0) and
                           (is_variant[i+16] == 0) and
                           (is_variant[i+17] == 0) and
                           (is_variant[i+18] == 0) and
                           (is_variant[i+19] == 0) and
                           (is_variant[i+20] == 0))
            else:
                rev = False
            out[i] = fwd or rev
    
    return np.asarray(out).view(bool)

@cython.boundscheck(False)
def opt_all_subsequent(cython.integral[:] t, Py_ssize_t n):
    """Locate contiguous regions with the same non-zero value."""
    
    cdef:
        cython.integral[:] out
        Py_ssize_t i, j
        cython.integral x
        bint subs
        
    out = np.zeros_like(t)
    
    with nogil:
        for i in range(t.shape[0] - n):
            x = t[i]
            if x > 0:
                subs = True
                for j in range(i+1, i+n):
                    subs = subs and t[j] == x
                if subs:
                    out[i] = x
                
    return np.asarray(out)
  
@cython.boundscheck(False)
def opt_zero_subsequent(cython.integral[:] t, Py_ssize_t n):
    """Locate non-overlapping targets."""

    cdef:
        cython.integral[:] out
        Py_ssize_t i, j
        cython.integral x
        
    out = np.zeros_like(t)
    i = 0

    with nogil:
        while i < t.shape[0]:
            x = t[i]
            if x > 0:
                out[i] = x
                i += n
            else:
                i += 1

    return np.asarray(out)

`opt_all_subsequent()` scans an array of integer values and reports for each entry, whether the `n` subsequent entries have the same non-zero value (used to check e.g. whether all entries are in the same exon). If the requirement is met, it replaces the corresponding entry in `out` by its value in `t`. Returns numpy array `out`, with zeroes where the condition is not met.  
`opt_zero_subsequent()` scans an array of integer values and returns an array of integers where non-zero values are spaces apart by at least `n`

### Python functions

In [17]:
@functools.lru_cache(maxsize=None)
def get_reference_sequence(chrom):
    """Load the reference sequence."""
    seq = af1.genome_sequence(chrom).compute()
    seq = np.char.upper(seq)
    return seq

In [18]:
# can't cache, but it's pretty quick
def get_is_cas9_target(chrom, revcomp=True, is_variant=None):
    """Find Cas9 target sequences."""
    seq = get_reference_sequence(chrom).view('u1')
    if is_variant is not None:
        is_variant = is_variant.view('u1')
    out = opt_is_cas9_target(seq, revcomp=revcomp, is_variant=is_variant)
    return out

In [19]:
@functools.lru_cache(maxsize=None)
def get_cds_idx(chrom):
    """Locate CDS features on the reference sequence."""
    seq = get_reference_sequence(chrom)
    out = np.zeros_like(seq, dtype=int)
    chrom = f'AfunGA1_{chrom}'
    for i, (seqid, start, end) in enumerate(cdss_chroms[['seqid', 'start', 'end']]):
        if seqid == chrom:
            out[start-1:end] = i + 1  # start indexing from 1
    return out
    

This gives each CDS a unique index -- however, if there are ovelapping CDS, the last listed one counts.

In [20]:
@functools.lru_cache(maxsize=None)
def get_gene_idx(chrom):
    """Locate protein-coding genes on the reference sequence."""
    seq = get_reference_sequence(chrom)
    out = np.zeros_like(seq, dtype=int)
    chrom = f'AfunGA1_{chrom}'
    for i, (seqid, start, end) in enumerate(genes_coding_chroms[['seqid', 'start', 'end']]):
        if seqid == chrom:
            out[start-1:end] = i + 1  # start indexing from 1
    return out

In [21]:
@functools.lru_cache(maxsize=None)
def get_target_in_cds(chrom):
    """Locate targets within a single CDS."""
    t = get_cds_idx(chrom)
    out = opt_all_subsequent(t, 21) > 0
    return out

In [22]:
#@functools.lru_cache(maxsize=None)
def get_is_variant_individual(chrom, sample_idx):
    """Locate variant sites for a given sample."""
    seq = get_reference_sequence(chrom)
    out = np.zeros_like(seq, dtype=bool)
    pos = af1.snp_calls(chrom).variant_position.values
    sample_idx_list = [int(sample_idx)]
    g = af1.snp_calls(chrom, sample_indices = sample_idx_list).call_genotype
    loc = np.any(g > 0, axis=(1,2))
    loc = loc.compute().values
    idx = pos[loc] - 1
    out[idx] = True

    return out

# Set up cluster

In [24]:
# gateway = Gateway()
# for cl in gateway.list_clusters():
#     gateway.connect(cl.name).shutdown()

In [25]:
gateway = Gateway()
conda_prefix = os.environ["CONDA_PREFIX"]
current_environment = 'global/'+conda_prefix.split('/')[5]
cluster = gateway.new_cluster(
    profile='standard', 
    conda_environment = current_environment,
)
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

In [26]:
client=cluster.get_client()

2024-08-08 06:25:16,961 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


In [27]:
cluster.scale(60)

# Run analysis

In [28]:
@functools.lru_cache(maxsize=None)
def identify_targets_population_cumulative(chrom, sample_idx):

    #Find targets in cds (not using site filter)
    base_loc = get_target_in_cds(chrom) 
    loc = base_loc & get_is_cas9_target(chrom, revcomp=True)
    
    #index genes
    gene_idx = get_gene_idx(chrom)
    
    # setup outputs
    out_sites = []
    out_targets = []
    out_genes = []
    
    # setup variation
    is_variant = np.zeros(gene_idx.shape[0], dtype=bool)
    
    #initial numbers
    out_sites.append(0)
    
    n_targets = np.count_nonzero(loc)
    out_targets.append(n_targets)
    
    n_genes = len(np.unique(gene_idx[loc]))
    out_genes.append(n_genes)
    
    #randomise order of samples
    rng = np.random.default_rng()
    sample_idx = rng.choice(sample_idx, size=len(sample_idx),
                              replace=False)
    
    for i, sidx in enumerate(sample_idx):
        
        #accumulate variation
        is_variant |= get_is_variant_individual(chrom, sidx)
        
        # locate sites
        n_sites = np.count_nonzero(is_variant)
        out_sites.append(n_sites)
        
        # locate targets and genes
        loc = base_loc & get_is_cas9_target(chrom, revcomp=True, is_variant=is_variant)
        n_targets = np.count_nonzero(loc)
        out_targets.append(n_targets)

        n_genes = len(np.unique(gene_idx[loc]))
        out_genes.append(n_genes)
        if i%10 == 0:
            print(f'Sample {i} done')


    return np.array([out_sites, out_targets, out_genes]).T
    
    

In [29]:
def run_analysis(chrom, pop, sample_idx):
    
    outdir = f'cumulative_results/{pop}/'
    if os.path.exists(f'{outdir}/target_info_{chrom}.npy'): 
        print(f'{outdir}/target_info_{chrom}.npy already exists, skipping computation')
    else:
        if not os.path.isdir(outdir):
            !mkdir {outdir}
        
        print(f'Computing for {len(sample_idx)} samples in cohort {pop}')
        target_info = identify_targets_population_cumulative(chrom, sample_idx)
        np.save(f'{outdir}/target_info_{chrom}.npy', target_info)
    

In [30]:
af1.add_extra_metadata(pd.read_csv('../../metadata/supp1.csv'))
meta = af1.sample_metadata()

                                     

In [32]:
sample_idx_dict = dict({'hicov': tuple(meta.loc[meta.subset_2=='Y'].index.values)})
for pop in meta.geographic_cohort.unique():
    sample_idx = meta.loc[(meta.geographic_cohort==pop) & (meta.subset_2=='Y')].index.values
    sample_idx_dict[pop] = tuple(sample_idx)

In [32]:
sample_idx_dict 

{'hicov': (0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  13,
  14,
  15,
  18,
  19,
  20,
  21,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  151,
  152,
  153,
  154,
  155,
  156,
  157,
  159,
  160,
  161

In [33]:
#First test on a small set
run_analysis('X', 'Ghana_Northern-Region', sample_idx = sample_idx_dict['Ghana_Northern-Region'])

cumulative_results/Ghana_Northern-Region//target_info_X.npy already exists, skipping computation


In [35]:
for chrom in ['X', '3RL', '2RL']:
    for pop in sample_idx_dict.keys():
        #skip full set for now
        if pop != 'hicov':
            run_analysis(chrom, pop, sample_idx=sample_idx_dict[pop])
            print(f'Done for cohort {pop} on chrom {chrom}')


cumulative_results/Ghana_Northern-Region//target_info_X.npy already exists, skipping computation
Done for cohort Ghana_Northern-Region on chrom X
cumulative_results/Gabon_Haut-Ogooue//target_info_X.npy already exists, skipping computation
Done for cohort Gabon_Haut-Ogooue on chrom X
cumulative_results/CAR_Ombella-MPoko//target_info_X.npy already exists, skipping computation
Done for cohort CAR_Ombella-MPoko on chrom X
cumulative_results/Cameroon_Adamawa//target_info_X.npy already exists, skipping computation
Done for cohort Cameroon_Adamawa on chrom X
cumulative_results/Ghana_Ashanti-Region//target_info_X.npy already exists, skipping computation
Done for cohort Ghana_Ashanti-Region on chrom X
cumulative_results/Malawi_Southern-Region//target_info_X.npy already exists, skipping computation
Done for cohort Malawi_Southern-Region on chrom X
cumulative_results/Mozambique_Maputo//target_info_X.npy already exists, skipping computation
Done for cohort Mozambique_Maputo on chrom X
cumulative_r