In [None]:
import malariagen_data

ag3 = malariagen_data.Ag3(
    "simplecache::gs://vo_agam_release",
    simplecache=dict(cache_storage="../gcs_cache"),
    cohorts_analysis="20230516",
)
ag3

In [None]:
import malariagen_data

af1 = malariagen_data.Af1(
    "simplecache::gs://vo_afun_release",
    simplecache=dict(cache_storage="../gcs_cache"),
    cohorts_analysis="20230823",
)
af1

## SNP allele frequencies

In [None]:
allele_freqs_df = ag3.snp_allele_frequencies(
    transcript="AGAP004707-RD",
    cohorts="admin1_year",
    sample_sets=("AG1000G-BF-A", "AG1000G-BF-B", "AG1000G-BF-C"),
    sample_query="taxon == 'coluzzii'",
)
allele_freqs_df

In [None]:
snps_df = allele_freqs_df.query("effect == 'NON_SYNONYMOUS_CODING' and max_af > 0.05")

In [None]:
ag3.plot_frequencies_heatmap(
    df=snps_df,
)

In [None]:
ag3.plot_frequencies_heatmap(
    df=snps_df.iloc[:2],
)

In [None]:
ag3.plot_frequencies_heatmap(
    df=snps_df.iloc[:1],
)

In [None]:
fig = ag3.plot_frequencies_heatmap(df=snps_df, title="SNP frequencies: Vgsc")
fig

In [None]:
af1.sample_sets()

In [None]:
af1.genome_features()

In [None]:
allele_freqs_df = af1.snp_allele_frequencies(
    transcript="LOC125767311_t1",
    cohorts="admin1_year",
    sample_sets=("1231-VO-MULTI-WONDJI-VMF00043", "1240-VO-CD-KOEKEMOER-VMF00099"),
)
allele_freqs_df

In [None]:
snps_df = allele_freqs_df.query("effect == 'NON_SYNONYMOUS_CODING' and max_af > 0.05")
snps_df

In [None]:
fig = af1.plot_frequencies_heatmap(df=snps_df)
fig

## Amino acid frequencies

In [None]:
aa_freqs_df = ag3.aa_allele_frequencies(
    transcript="AGAP004707-RD",
    cohorts="admin1_year",
    sample_sets=("AG1000G-BF-A", "AG1000G-BF-B", "AG1000G-BF-C"),
    sample_query="taxon == 'coluzzii'",
)

In [None]:
aa_df = aa_freqs_df.query("max_af > 0.05")

In [None]:
aa_df

In [None]:
ag3.plot_frequencies_heatmap(
    df=aa_df,
)

In [None]:
aa_freqs_df = ag3.aa_allele_frequencies(
    transcript="AGAP004707-RD",
    cohorts="admin1_year",
    sample_sets="3.0",
    sample_query="taxon == 'coluzzii'",
).query("max_af > 0.05")
aa_freqs_df

In [None]:
ag3.plot_frequencies_heatmap(df=aa_freqs_df)

In [None]:
aa_freqs_df = ag3.aa_allele_frequencies(
    transcript="AGAP004707-RD",
    cohorts="admin1_year",
    sample_sets="3.0",
).query("max_af > 0.05")
ag3.plot_frequencies_heatmap(df=aa_freqs_df)

In [None]:
aa_freqs_df = af1.aa_allele_frequencies(
    transcript="LOC125767311_t1",
    cohorts="admin1_year",
    sample_sets=("1231-VO-MULTI-WONDJI-VMF00043", "1240-VO-CD-KOEKEMOER-VMF00099"),
).query("max_af > 0.05")
aa_freqs_df

In [None]:
af1.plot_frequencies_heatmap(df=aa_freqs_df)

## Gene CNV frequencies

In [None]:
cyp6aap_region = "2R:28,450,000-28,510,000"

In [None]:
cyp6aap_cnv_freqs_df = ag3.gene_cnv_frequencies(
    region=cyp6aap_region,
    cohorts="admin1_year",
    sample_sets=("AG1000G-BF-A", "AG1000G-BF-B", "AG1000G-BF-C"),
    sample_query="taxon == 'coluzzii'",
)

In [None]:
cyp6aap_cnv_freqs_df

In [None]:
ag3.plot_frequencies_heatmap(
    df=cyp6aap_cnv_freqs_df,
)

In [None]:
ag3.plot_frequencies_heatmap(
    df=cyp6aap_cnv_freqs_df[["label", "frq_BF-09_colu_2014"]],
)

In [None]:
ag3.plot_frequencies_heatmap(
    df=cyp6aap_cnv_freqs_df,
    color_continuous_scale="Blues",
    title="Cyp6p/aa gene CNV frequencies",
)