# File Change Matrix Visualizer

This notebook demonstrates the FileChangeMatrixVisualizer with proper pandas display configuration.

In [1]:
from tardisbase.testing.regression_comparison.run_tests import run_tests
from tardisbase.testing.regression_comparison.visualize_files import FileChangeMatrixVisualizer
from tardisbase.testing.regression_comparison.util import get_last_n_commits
import pandas as pd
from IPython.display import display

In [2]:
# Configure pandas display options for better visualization
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
# Configuration for the analysis
config = {
    "tardis_repo_path": "/home/riddhi/workspace/tardis-main/tardis",
    "regression_data_repo_path": "/home/riddhi/workspace/tardis-main/tardis-regression-data",
    "branch": "master",
    "n": 3,
    "target_file": "tardis/spectrum/tests/test_spectrum_solver/test_spectrum_solver/TestSpectrumSolver.h5",
    "commits": ["300e565e83112528faaa76e970057ffb1b13f743", "2a06fdfb60190bbd9b49ff572d78772607138660", "2d775dcd1c486227532f537fc41066e942000e56"],  # Uncomment for specific commits
    # "commits": commits.calculate_commits(n=10, gap=0, info=False), #Uncomment for n commits with custom gap
    "use_conda": True,
    "conda_manager": "conda"
}

## Option 1: Run tests to generate regression data commits

### Case 1: Test latest N TARDIS commits

In [None]:
processed_commits, regression_commits, original_head, target_file_path = run_tests(
        config["tardis_repo_path"], 
        config["regression_data_repo_path"], 
        config["branch"],
        config["target_file"], 
        n=config["n"], 
        use_conda=config["use_conda"],
        conda_manager=config["conda_manager"]
    )

### Case 2: Test specific TARDIS commits
</br>When `commits_input` is a list of commit hashes

In [None]:
processed_commits, regression_commits, original_head, target_file_path = run_tests(
    config["tardis_repo_path"], 
    config["regression_data_repo_path"], 
    config["branch"], 
    config["target_file"], 
    commits_input=config["commits"],
    use_conda=config["use_conda"],
    conda_manager=config["conda_manager"]
)

## Option 2: Use existing regression data commits

### A.  Manual Commit Selection

In [4]:
regression_commits = ["66a96a847c873544babb7bf934040c86433a5962",
                      "d12d869bd2bb2038c9090852ee9ef998959f412d",
                      "b008a7180440a697ad5b54a9f77b692d4f71b120",
                      "a2a946a43d710c44bb3b08bcae69359fe13ed032",
                      "9404dc594563d9457e3ba91fcaa8400cae231801"]

### B.  Automatically fetch the most recent N commits from regression data repository

In [None]:
regression_commits = get_last_n_commits(n=5, repo_path=config["regression_data_repo_path"])
regression_commits

## Visualize File Changes
Create a visualizer object to analyze file changes across commits.

Case 1: Direct regression data commits (no TARDIS commits)

In [5]:
visualizer = FileChangeMatrixVisualizer(
    regression_repo_path=config["regression_data_repo_path"],
    commits=regression_commits,
    # file_extensions=('.h5', '.hdf5') # Uncomment to filter specific files
)


Case 2: Regression data commits generated from TARDIS commits

In [5]:
visualizer = FileChangeMatrixVisualizer(
    regression_repo_path=config["regression_data_repo_path"],
    commits=regression_commits,
    tardis_commits=processed_commits,
    tardis_repo_path=config["tardis_repo_path"],
    # file_extensions=('.h5', '.hdf5') # Uncomment to filter specific files
)

In [6]:
# Analyze the commits
visualizer.analyze_commits()

Analyzing 5 commits (4 transitions)...
Processing transition 1/4: d12d86-66a96a
Processing transition 2/4: b008a7-d12d86
Processing transition 3/4: a2a946-b008a7
Processing transition 4/4: 9404dc-a2a946
Found 307 total files across all transitions.


### Print all files found during analysis

In [None]:
h5_files = sorted([f for f in visualizer.all_files if f.endswith(('.h5', '.hdf5'))])
for i, file in enumerate(h5_files, 1):
    print(f"{i:3d}. {file}")
print(f"\nTotal: {len(h5_files)} .h5/.hdf5 files")

### Display the file change matrix 

In [9]:
with pd.option_context('display.max_colwidth', None, 'display.width', None, 
                      'display.max_columns', None, 'display.max_rows', None):
    visualizer.get_analysis_results()

In [10]:
commit_info, legend, matrix = visualizer.get_analysis_results()
display(matrix)

Unnamed: 0,Files,d12d86-66a96a,b008a7-d12d86,a2a946-b008a7,9404dc-a2a946
0,.gitattributes,•,•,M,•
1,.github/actions/setup_env/action.yml,A,•,•,•
2,.github/workflows/run-notebook.yml,A,•,•,•
3,.github/workflows/trigger-lfs-cache.yml,A,•,•,•
4,.gitignore,•,•,•,•
5,LICENSE,•,•,•,•
6,__init__.py,A,•,•,•
7,arepo_data/arepo_snapshot.hdf5,•,•,•,•
8,arepo_data/arepo_snapshot.json,•,•,•,•
9,atom_data/chianti_He.h5,•,•,M,•
