In [None]:
import malariagen_data

In [None]:
ag3 = malariagen_data.Ag3(
    "simplecache::gs://vo_agam_release_master_us_central1",
    simplecache=dict(cache_storage="../gcs_cache"),
    results_cache="results_cache",
)
ag3

In [None]:
sample_sets = ["AG1000G-BF-A", "AG1000G-GH"]

In [None]:
# Not sure if these inversion coordinates are exactly right, but should do as an approximation.
region_2la = "2L:20,528,089-42,165,182"
region_2rb = "2R:19,444,433-26,313,071"
region_2rj = "2R:4,050,701-14,952,998"
region_2rc = "2R:27,025,144-31,450,000"
region_2ru = "2R:33,575,891-34,873,652"
region_2rd = "2R:33,575,891-41,360,919"
region_2rk = "2R:25,146,360-30,717,395"

## 2La

In [None]:
kt_df_2la = ag3.karyotype(inversion="2La", sample_sets=sample_sets)
kt_df_2la.head()

In [None]:
pca_df_2la, pca_evr_2la = ag3.pca(
    region=region_2la,
    sample_sets=sample_sets,
    n_snps=50_000,
)
pca_df_2la = pca_df_2la.merge(kt_df_2la, on="sample_id")
pca_df_2la.head()

In [None]:
ag3.plot_pca_coords(pca_df_2la, color="taxon", width=600, height=500)

In [None]:
ag3.plot_pca_coords(pca_df_2la, color="karyotype_2La", symbol="taxon", width=600, height=500)

In [None]:
ag3.plot_pca_coords(pca_df_2la, color="karyotype_2La_mean", width=700, height=500)

In [None]:
kt_df_2la["karyotype_2La_mean"].hist(backend="plotly")

## 2Rb

In [None]:
kt_df_2rb = ag3.karyotype(inversion="2Rb", sample_sets=sample_sets)
kt_df_2rb.head()

In [None]:
pca_df_2rb, pca_evr_2rb = ag3.pca(
    region=region_2rb,
    sample_sets=sample_sets,
    n_snps=50_000,
)
pca_df_2rb = pca_df_2rb.merge(kt_df_2rb, on="sample_id")
pca_df_2rb.head()

In [None]:
ag3.plot_pca_coords(pca_df_2rb, color="karyotype_2Rb", symbol="taxon", width=600, height=500)

In [None]:
pca_df_2rb["karyotype_2Rb_mean"].hist(backend="plotly")

## 2Rc_gam

In [None]:
kt_df_2rc_gam = ag3.karyotype(
    inversion="2Rc_gam", 
    sample_sets=sample_sets,
    sample_query="taxon == 'gambiae'",
)
kt_df_2rc_gam.head()

In [None]:
fig = kt_df_2rc_gam["karyotype_2Rc_gam_mean"].hist(backend="plotly")
fig.update_xaxes(range=[0, 2])
fig

In [None]:
pca_df_2rc_gam, _ = ag3.pca(
    region=region_2rc,
    sample_sets=sample_sets,
    sample_query="taxon == 'gambiae'",
    n_snps=50_000,
)
pca_df_2rc_gam = pca_df_2rc_gam.merge(kt_df_2rc_gam, on="sample_id")
pca_df_2rc_gam.head()

In [None]:
ag3.plot_pca_coords(pca_df_2rc_gam, color="karyotype_2Rc_gam", symbol="taxon", width=600, height=500)

In [None]:
ag3.plot_pca_coords(pca_df_2rc_gam, color="country", symbol="taxon", width=600, height=500)

## 2Rc_col

In [None]:
kt_df_2rc_col = ag3.karyotype(
    inversion="2Rc_col", 
    sample_sets=sample_sets,
    sample_query="taxon == 'coluzzii'",
)
kt_df_2rc_col.head()

In [None]:
fig = kt_df_2rc_col["karyotype_2Rc_col_mean"].hist(backend="plotly")
fig.update_xaxes(range=[0, 2])
fig

In [None]:
pca_df_2rc_col, _ = ag3.pca(
    region=region_2rc,
    sample_sets=sample_sets,
    sample_query="taxon == 'coluzzii'",
    n_snps=50_000,
)
pca_df_2rc_col = pca_df_2rc_col.merge(kt_df_2rc_col, on="sample_id")
pca_df_2rc_col.head()

In [None]:
ag3.plot_pca_coords(pca_df_2rc_col, color="karyotype_2Rc_col", symbol="country", width=600, height=500)