In [None]:
import malariagen_data
import allel
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
ag3 = malariagen_data.Ag3(
    "simplecache::gs://vo_agam_release",
    simplecache=dict(cache_storage="../gcs_cache"),
    cohorts_analysis="20240717",
    results_cache="results_cache",
)
ag3

In [None]:
!rm -rf results_cache

In [None]:
region = "3L:15,000,000-16,000,000"
site_mask = "gamb_colu"
n_jack = 200

In [None]:
fst_hudson, se_hudson = ag3.average_fst(
    region=region,
    cohort1_query="cohort_admin2_year == 'ML-2_Kati_colu_2014'",
    cohort2_query="cohort_admin2_year == 'ML-2_Kati_gamb_2014'",
    sample_sets="3.0",
    n_jack=n_jack,
    site_mask=site_mask,
)
fst_hudson, se_hudson

In [None]:
pairwise_fst_df = ag3.pairwise_average_fst(
    region=region,
    cohorts="cohort_admin1_year",
    sample_query="country == 'Mali' and taxon == 'gambiae'",
    sample_sets="3.0",
    n_jack=n_jack,
    site_mask=site_mask,
)
pairwise_fst_df

In [None]:
ag3.plot_pairwise_average_fst(pairwise_fst_df)

In [None]:
ag3.plot_pairwise_average_fst(pairwise_fst_df, annotation="standard error")

In [None]:
ag3.plot_pairwise_average_fst(pairwise_fst_df, annotation="Z score")

In [None]:
ag3.plot_pairwise_average_fst(pairwise_fst_df, zmax=0.1, width=400, height=300)

In [None]:
region = "3L:15,000,000-16,000,000"
site_mask = "arab"
n_jack = 200

In [None]:
pairwise_fst_df = ag3.pairwise_average_fst(
    region=region,
    cohorts="cohort_admin1_year",
    sample_query="country == 'Kenya' and taxon == 'arabiensis'",
    sample_sets=["AG1000G-KE", "1274-VO-KE-KAMAU-VMF00246"],
    n_jack=n_jack,
    site_mask=site_mask,
    min_cohort_size=10,
)
pairwise_fst_df

In [None]:
ag3.plot_pairwise_average_fst(pairwise_fst_df)

In [None]:
ag3.plot_pairwise_average_fst(pairwise_fst_df, annotation="standard error")

In [None]:
ag3.plot_pairwise_average_fst(pairwise_fst_df, annotation="Z score", zmax=0.03)

In [None]:
wild_cohorts = {
    "Mwea_2007": "taxon == 'arabiensis' and location == 'Mwea' and year == 2007",
    "Mwea_2014": "taxon == 'arabiensis' and location == 'Mwea' and year == 2014",
    "Teso_2013": "taxon == 'arabiensis' and location == 'Teso'",
    "Turkana_2006": "taxon == 'arabiensis' and location == 'Turkana'",
    "Kilifi_2012": "taxon == 'arabiensis' and location == 'Kilifi' and year == 2012",
}
fst_df = ag3.pairwise_average_fst(
    region="3L:15,000,000-16,000,000",
    cohorts=wild_cohorts,
    min_cohort_size=10,
    site_mask="arab",
)
fst_df

In [None]:
ag3.plot_pairwise_average_fst(fst_df, annotation="Z score", zmax=0.4)