In [7]:
import pandas as pd
import plotly.express as px
from itertools import combinations
from pathlib import Path

# Base directory for all folders
base_dir = Path.cwd().parent / "data"

# Folders to process
folders = ["GFSISB", "GFSFALCS", "GFSMAB", "GFSSSUC"]

# Initialize an empty list to store DataFrames
data_frames = []

# Loop through each folder and load summaries
for folder in folders:
    summary_dir = base_dir / folder / "Summaries"
    aggregated_file = summary_dir / 'aggregated_summary_2014_2020.csv'
    if aggregated_file.exists():
        # Load the aggregated summary
        df = pd.read_csv(aggregated_file)
        
        # Add a folder identifier for coloring
        df['Folder'] = folder
        
        # Rename column if necessary
        if folder == "GFSISB":
            df.rename(columns={'Stocks, Transactions, and Other Flows Name': 'Classification Name'}, inplace=True)
        
        data_frames.append(df)
    else:
        print(f"Aggregated file not found in {folder}.")

# Combine all DataFrames into one
agg_summary = pd.concat(data_frames, ignore_index=True)

# Define numeric columns for scatter plots
numeric_columns = ['Sum of Legitimate Entries', 'Number of Covered Countries']

# Generate scatter plots for all possible combinations of numeric columns
scatter_plots = []
for x_col, y_col in combinations(numeric_columns, 2):
    fig = px.scatter(
        agg_summary,
        x=x_col,
        y=y_col,
        color='Folder',  # Color by folder
        hover_data=['Classification Name', 'Sector Name', 'Unit Name'],
        title=f"Scatter Plot of {x_col} vs {y_col}",
        labels={x_col: x_col, y_col: y_col}
    )
    scatter_plots.append(fig)
    fig.show()  # Display the plot

# Optional: Save plots as HTML files
output_dir = base_dir / "All_Plots"
output_dir.mkdir(parents=True, exist_ok=True)
for i, fig in enumerate(scatter_plots, start=1):
    fig.write_html(output_dir / f"scatter_plot_{i}.html")
