In [1]:
import pandas as pd
import allel
import malariagen_data
import numpy as np

def subset_xarray(geno, samples, parent1, parent2, offspring):

    pmask = np.isin(samples, [parent1, parent2])
    omask = samples == offspring
    gn_parents = allel.GenotypeArray(geno.compress(pmask, axis=1))
    gn_off = allel.GenotypeArray(geno.compress(omask, axis=1))

    return gn_parents.concatenate(gn_off, axis=1)

In [5]:
ag3 = malariagen_data.Ag3(results_cache="../../results_cache/", 
                          simple_cache="../../gcs_cache/")

In [6]:
df_samples = ag3.sample_metadata(sample_sets='AG1000G-X')
df_crosses = ag3.cross_metadata()

                                     

In [10]:
from tqdm.notebook import tqdm
import zarr

for contig in ('2RL', '3RL', 'X'):

    ds_snps = ag3.biallelic_snp_calls(region=contig, sample_sets='AG1000G-X', site_mask='gamb_colu') 
    samples = ds_snps['sample_id'].values
    geno = ds_snps['call_genotype'].values
    pos = ds_snps['variant_position'].values

    print(f"phasing {contig}...")
    offspring_gns = []
    for i, row in tqdm(df_crosses.query("role == 'progeny'").iterrows()):
    
        father, mother, offspring = offspring, father, mother = row.iloc[[1,2,3]]
    
        gn = subset_xarray(geno, samples, father, mother, offspring)
    
        phased_gn = allel.phase_by_transmission(gn, window_size=50)
        #phased_gn = phased_gn.compress(phased_gn[:, 2].is_phased, axis=0)
    
        offspring_gns.append(phased_gn[:, [2]])

    offspring_gn = offspring_gns[0].concatenate(offspring_gns[1:], axis=1)
    is_phased_all = offspring_gn.is_phased.all(axis=1)
    offspring_gn = offspring_gn.compress(is_phased_all, axis=0)
    
    zarr.save(f"crosses-offspring-gn-{contig}.zarr", offspring_gn.values)
    zarr.save(f"pos-{contig}.zarr", pos[is_phased_all])

phasing 2RL...                              


0it [00:00, ?it/s]

phasing 3RL...                              


0it [00:00, ?it/s]

phasing X...                                


0it [00:00, ?it/s]

In [11]:
ls

ancIBD-crosses.ipynb                   lab-crosses-phase-by-transmission.ipynb
[0m[01;34mcrosses-offspring-gn-2R:1-10000.zarr[0m/  [01;34mpos-2R:1-10000.zarr[0m/
[01;34mcrosses-offspring-gn-2RL.zarr[0m/         [01;34mpos-2RL.zarr[0m/
[01;34mcrosses-offspring-gn-3RL.zarr[0m/         [01;34mpos-3RL.zarr[0m/
[01;34mcrosses-offspring-gn-X.zarr[0m/           [01;34mpos-X.zarr[0m/
featurecountsVhtseq.ipynb
