In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

from jacksonii_analyses import vcf_parser, clustering

In [None]:
from importlib import reload
reload(vcf_parser)
reload(clustering)

os.makedirs("../data/figs", exist_ok=True)

In [None]:
variants = vcf_parser.vcf_to_geno_df(
    "../data/var/filtered_variants.vcf.gz",
)
variants.iloc[:5, :5]  # Display the first 5 rows and columns

In [None]:
groups = [
    'A. jacksonii', 
    'A. sp. T31', 
    'A. sp. jack6', 
    'A. sp. jack5',
    'A. sp. jack3', 
    'A. sp. jack2', 
    'A. sp. jack1', 
    'A. sp. F11',
]

palette = [
    "#1f77b4",  # blue
    "#ff7f0e",  # orange
    "#2ca02c",  # green
    "#d62728",  # red
    "#9467bd",  # purple
    "#8c564b",  # brown
    "#e377c2",  # pink
    "#7f7f7f",  # gray
]

map_colors = dict(zip(groups, palette))

pops = clustering.read_populations(
    "../data/samples/populations.txt",
)

pops.loc[:, "colormap"] = pops["populations_clean"].map(map_colors)
pops.head()

In [None]:
pc_df = clustering.calculate_pca(
    geno_df=variants,
    n_components=2,
    pops=pops,
)
pc_df.head()

In [None]:
palette = dict(zip(pops["populations_clean"], pops["colormap"]))
plt.figure(figsize=(5.5, 3.7))  # Smaller plot
sns.scatterplot(
    data=pc_df,
    x='PC1',
    y='PC2',
    hue='populations',
    style='populations',
    palette=palette,
    s=60,
    edgecolor='black',
    linewidth=0.8   
)
plt.axhline(0, color='black', linestyle='dotted', linewidth=1)
plt.axvline(0, color='black', linestyle='dotted', linewidth=1)
plt.legend(title='Population', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig("../data/figs/pca_plot_color.svg")  # Save as SVG
plt.show()

In [None]:
plt.figure(figsize=(5.5, 3.7))  # Smaller plot
sns.scatterplot(
    data=pc_df,
    x='PC1',
    y='PC2',
    hue='populations',
    style='populations',
    palette='grey',
    s=60,
    edgecolor='black',
    linewidth=0.8   
)
plt.axhline(0, color='black', linestyle='dotted', linewidth=1)
plt.axvline(0, color='black', linestyle='dotted', linewidth=1)
plt.legend(title='Population', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.savefig("../data/figs/pca_plot_grey.svg")  # Save as SVG
plt.show()