In [None]:
import malariagen_data

## Ag3

In [None]:
ag3 = malariagen_data.Ag3(
    "simplecache::gs://vo_agam_release",
    simplecache=dict(cache_storage="../gcs_cache"),
    results_cache="results_cache",
)
ag3

In [None]:
ag3.plot_diplotype_clustering_advanced(
    region='2L:2,350,000-2,410,000',
    snp_transcript='AGAP004707-RD',
    snp_query="effect == 'NON_SYNONYMOUS_CODING'",
    snp_filter_min_maf=0.05,
    sample_sets="AG1000G-GH",
    site_mask="gamb_colu",
    color="taxon",
    symbol="country",
    linkage_method="complete",
    count_sort=True,
    distance_sort=False,
)

In [None]:
fig = ag3.plot_diplotype_clustering_advanced(
    region="2R:28,480,000-28,500,000",
    cnv_region="2R:28,480,000-28,500,000",
    snp_transcript='AGAP002862-RA',
    snp_filter_min_maf=0.05,
    sample_sets="AG1000G-GH",
    site_mask="gamb_colu",
    color="taxon",
    symbol="country",
    linkage_method="complete",
    count_sort=True,
    distance_sort=False,
    show=False,
)
fig

In [None]:
ag3.plot_diplotype_clustering_advanced(
    region="2R:28,480,000-28,500,000",
    cnv_region = "2R:28,480,000-28,500,000",
    snp_transcript='AGAP002862-RA',
    sample_sets=["AG1000G-GH", 'AG1000G-BF-A'],
    snp_filter_min_maf=0.05,
    site_mask="gamb_colu",
    color="taxon",
    symbol="country",
    linkage_method="complete",
    count_sort=True,
    distance_sort=False,
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2L:2,410,000-2,430,000",
    sample_sets=["AG1000G-GH", "AG1000G-BF-B"],
    site_mask="gamb_colu",
    color="taxon",
    symbol="country",
    linkage_method="complete",
    width=1000,
    height=500,
    count_sort=True,
    distance_sort=False,
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2L:2,410,000-2,430,000",
    sample_sets=["AG1000G-GH", "AG1000G-BF-B"],
    site_mask=None,
    color="taxon",
    symbol="country",
    linkage_method="complete",
    count_sort=True,
    distance_sort=False,
    render_mode="svg",
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2L:2,410,000-2,430,000",
    sample_sets=["AG1000G-GH", "AG1000G-BF-B"],
    site_mask="gamb_colu",
    color="country",
    symbol="taxon",
    linkage_method="single",
    width=1000,
    height=500,
    count_sort=True,
    distance_sort=False,
    render_mode="webgl",
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2L:28,545,000-28,550,000",
    sample_sets=["AG1000G-GH", "1244-VO-GH-YAWSON-VMF00051"],
    site_mask="gamb_colu",
    color="admin1_name",
    symbol="taxon",
    cohort_size=500,
    linkage_method="weighted",
    distance_metric="euclidean",
    count_sort=True,
    width=1200,
    height=600,
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2R:28,480,000-28,490,000",
    sample_sets=["3.0"],
    sample_query="taxon == 'arabiensis'",
    site_mask="gamb_colu_arab",
    color="sample_set",
    cohort_size=None,
    width=1000,
    height=400,
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2R:28,480,000-28,490,000",
    sample_sets=["3.0"],
    sample_query="taxon == 'arabiensis'",
    site_mask="gamb_colu_arab",
    color="admin1_year",
    cohort_size=None,
    width=1000,
    height=400,
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2R:28,480,000-28,490,000",
    sample_sets=["3.0"],
    sample_query="taxon == 'arabiensis'",
    site_mask="gamb_colu_arab",
    color="admin1_year",
    cohort_size=None,
    width=1000,
    height=400,
    title=None,
)

In [None]:
new_cohorts = {
    "East": "country in ['Malawi', 'Tanzania', 'Kenya', 'Uganda']",
    "West": "country in ['Mali', 'Burkina Faso', 'Cameroon']",
}
other_cohorts = {
    "East": "country in ['Malawi']",
    "West": "country in ['Mali', 'Burkina Faso', 'Cameroon']",
}

In [None]:
ag3.plot_diplotype_clustering(
    region="2R:28,480,000-28,490,000",
    sample_sets=["3.0"],
    sample_query="taxon == 'arabiensis'",
    distance_metric="euclidean",
    site_mask="gamb_colu_arab",
    color=new_cohorts,
    cohort_size=None,
    width=1000,
    height=400,
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2R:28,480,000-28,490,000",
    sample_sets=["3.0"],
    sample_query="taxon == 'arabiensis'",
    site_mask="gamb_colu_arab",
    color=other_cohorts,
    cohort_size=None,
    width=1000,
    height=400,
)

In [None]:
ag3.plot_diplotype_clustering(
    region="2R:28,480,000-28,490,000",
    sample_sets=["3.0"],
    sample_query="taxon == 'arabiensis'",
    site_mask="gamb_colu_arab",
    symbol=new_cohorts,
    color="year",
    cohort_size=None,
    width=1000,
    height=400,
)

## Af1

In [None]:
af1 = malariagen_data.Af1(
    "simplecache::gs://vo_afun_release",
    simplecache=dict(cache_storage="../gcs_cache"),
    debug=False,
    pre=True,
)
af1

In [None]:
af1.plot_diplotype_clustering(
    region="2RL:2,410,000-2,430,000",
    sample_sets=["1240-VO-CD-KOEKEMOER-VMF00099", "1240-VO-MZ-KOEKEMOER-VMF00101"],
    color="sample_set",
    symbol="country",
    linkage_method="complete",
    distance_metric="euclidean",
    width=1000,
    height=500,
    count_sort=True,
    distance_sort=False,
)

In [None]:
af1.plot_diplotype_clustering(
    region="2RL:28,545,000-28,550,000",
    sample_sets=["1240-VO-CD-KOEKEMOER-VMF00099", "1240-VO-MZ-KOEKEMOER-VMF00101"],
    color="country",
    symbol="sample_set",
    cohort_size=80,
    linkage_method="weighted",
    count_sort=True,
    width=1200,
    height=600,
)

In [None]:
af1.plot_diplotype_clustering(
    region="2RL:28,480,000-28,490,000",
    sample_sets=["1.0"],
    sample_query="country == 'Ghana'",
    color="sample_set",
    cohort_size=None,
    width=1000,
    height=400,
)