In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import matplotlib.pyplot as plt
import seaborn as sns

from jacksonii_analyses import vcf_parser, clustering
from jacksonii_analyses.plotting import *

In [None]:
geno_df = vcf_parser.vcf_to_geno_df("../data/var/filtered_variants.vcf.gz")
pops = clustering.read_populations("../data/samples/populations.txt")
pops = pops.reset_index("sample")
pops["colormap"] = pops["populations_clean"].map(map_colors)
admixed_samples = pd.read_csv("../data/var/admixture/admixed_individuals.csv")

In [None]:
clean_pops = pops[~pops["sample"].isin(admixed_samples["sample"])]

In [None]:
geno_df = geno_df.loc[clean_pops["sample"]]  # Ensure order matches
X = geno_df.values
y = clean_pops['populations_clean'].values

In [None]:
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X)

lda = LDA(n_components=2)
X_lda = lda.fit_transform(X_pca, y)

In [None]:
dapc_df = pd.DataFrame(X_lda, columns=['DAPC1', 'DAPC2'])
dapc_df['population'] = y

In [None]:
plt.figure(figsize=(6,4))
sns.scatterplot(data=dapc_df, x='DAPC1', y='DAPC2', hue='population', style='population', s=60)
plt.title("Discriminant Analysis of Principal Components (DAPC)")
plt.tight_layout()
plt.legend(title="Population", bbox_to_anchor=(1.05, 1), loc="upper left", prop={'size': 8})
plt.savefig("../data/figs/dapc_plot.svg", bbox_inches='tight')
plt.show()

In [None]:
palette = dict(zip(clean_pops["populations_clean"], clean_pops["colormap"]))  # or use your map_colors
style_order = clean_pops["populations_clean"].unique()

focus_pops = [
    "A. jacksonii",
    "A. sp. jack1",
    "A. sp. jack6",
    "A. sp. jack5",
    "A. sp. jack2",
]

dapc_focus = dapc_df[dapc_df["population"].isin(focus_pops)]

plt.figure(figsize=(6, 4))
sns.scatterplot(
    data=dapc_focus,
    x='DAPC1',
    y='DAPC2',
    hue='population',
    style='population',
    palette=palette,           # Use the same color mapping
    hue_order=style_order,     # Keep color and shape consistent
    style_order=style_order,
    s=60
)
plt.title("DAPC (Focused on 5 Populations)")
plt.tight_layout()
plt.legend(title="Population", bbox_to_anchor=(1.05, 1), loc="upper left", prop={'size': 8})
plt.savefig("../data/figs/dapc_plot_5p.svg", bbox_inches='tight')
plt.show()