In [2]:
import pandas as pd
import plotly.express as px

# Summary: This script reads a CSV file containing PCA analysis results of Q matrix parameters of different substitution models and generates an interactive 3D PCA plot.
# Args:
#   file_path: str, path to the CSV file
# Returns:
#   None, displays an interactive 3D PCA plot

def plot_3d_pca(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    max_size = df['mean_genome_size'].max()
    df['normalized_size'] = df['mean_genome_size'] / max_size
    
    # Create a 3D scatter plot
    fig = px.scatter_3d(df, x='PC1', y='PC2', z='PC3',
                        color='mean_gc_percentage', size='mean_genome_size',
                        hover_name='GTDB_Phylum',
                        title='Interactive 3D PCA Plot of Substitution Models')

    fig.update_layout(width=800, height=600)

    # Display the plot
    fig.show()

# Example usage
file_path = '/home/tim/project/GTDB_TREE/analysis/model_and_genome/genome_numeric_stat_with_PCA_trained.csv'
plot_3d_pca(file_path)
