In [None]:
# To run this, I first created a clean python 3.10 virtual environment with `python -m venv env_qb_gsee_benchmark`, 
# activated the environment `source env_qb_gsee_benchmark/bin/activate`, ran `python -m pip install notebook`, 
# then ran this notebook in VS Code.

# Install pip packages in the current Jupyter kernel (from https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/)
import sys
!{sys.executable} -m pip install --extra-index-url=https://block-hczhai.github.io/block2-preview/pypi/ git+https://github.com/jtcantin/dmrghandler
# Initial installation took 5 min

In [1]:
import numpy as np
import h5py
import dmrghandler.hdf5_io as hdf5_io
import dmrghandler.data_processing as dp
from pathlib import Path
import openpyxl as px
import openpyxl.chart as px_chart
import pandas as pd
import re
import io
import scipy as sp
import json
import jsonschema


This notebook will collect the DMRG calculation data and then output an excel file with the data and plots (one sheet per calculation run), as well as csv files that contain the performance metric data (bond dimension, DMRG energy, time, discarded weights, etc.)

To run this notebook and collect the data, the following are needed:
- catalysis_instances.csv
    
    This contains the metadata for the catalysist instances.
    Put into `catalyst_meta_data_path`

- DMRG output datafolders (i.e. those that were output with the UUIDs)
    
    Put folder of these datafolders into `data_file_path`

- The run commands returned by the `prepare_calcs_*.py` files
    
    Put into `run_commands`

- SLURM emails (.eml or other text format should work)
    
    The notebook will automatically collect the run times from the email subjects.
    Put folder of these emails into `slurm_emails_folder`

- The FCIDUMP files
    
    Put the folder of these files into `fci_dump_folder`

- Excel output save folder
    
    Put into `excel_storage_path`
    
- name of output excel file
    
    Put into `excel_name`

- Performance metrics folder location
    
    Put into `csv_storage_path`

In [2]:


run_commands = \
"""
### prepare_calcs_gsee_benchmark_coarse_set_run1.py
### ########################################################
# fcidumps_benchmark/FCIDUMP_L_4x4_Ut_2.0
sbatch ../c3b3913d-8fe0-4f08-ac1b-b9d4a62ee370/submit_c3b3913d-8fe0-4f08-ac1b-b9d4a62ee370.sh
### # fcidumps_benchmark/FCIDUMP_b_sto-3g
### sbatch ../2d6b0ad9-ace6-4c54-bfb8-8f6b74f73ca2/submit_2d6b0ad9-ace6-4c54-bfb8-8f6b74f73ca2.sh
# fcidumps_benchmark/FCIDUMP_d_1.68_b_sto-3g_ne_12
sbatch ../a9cf5764-946b-4f60-80a2-0b35434e1edd/submit_a9cf5764-946b-4f60-80a2-0b35434e1edd.sh
# fcidumps_benchmark/FCIDUMP_g_bent_b_sto-3g
sbatch ../2fdb5351-f05b-47af-af7f-a5d979aefbee/submit_2fdb5351-f05b-47af-af7f-a5d979aefbee.sh
# fcidumps_benchmark/FCIDUMP_g_ring_b_sto-3g
sbatch ../1c03f568-afd2-4673-83df-55f57a3b33b5/submit_1c03f568-afd2-4673-83df-55f57a3b33b5.sh
### prepare_calcs_gsee_benchmark_coarse_set_run2.py
### ########################################################
# fcidumps_benchmark/FCIDUMP_L_4x4_Ut_4.0
sbatch ../8a6294b8-e8bf-49e8-b76b-4dde7e589aac/submit_8a6294b8-e8bf-49e8-b76b-4dde7e589aac.sh
# fcidumps_benchmark/FCIDUMP_L_4x4_Ut_8.0
sbatch ../91df2a51-ec2d-48c1-a44d-e47321fd2dc6/submit_91df2a51-ec2d-48c1-a44d-e47321fd2dc6.sh
### # fcidumps_benchmark/FCIDUMP_L_6x6_Ut_2.0
### sbatch ../33ab744c-b8f9-4bf2-94a7-e9e959393dc1/submit_33ab744c-b8f9-4bf2-94a7-e9e959393dc1.sh
### # fcidumps_benchmark/FCIDUMP_L_6x6_Ut_4.0
### sbatch ../b80febb1-ee19-43bf-bd14-f94575dfc0ee/submit_b80febb1-ee19-43bf-bd14-f94575dfc0ee.sh
### # fcidumps_benchmark/FCIDUMP_L_6x6_Ut_8.0
### sbatch ../c40f5a7c-9c51-4a26-956a-e01ac591daf9/submit_c40f5a7c-9c51-4a26-956a-e01ac591daf9.sh
### # fcidumps_benchmark/FCIDUMP_b_cc-pvdz
### sbatch ../3ec57d84-4297-411b-a9e2-4dc8271a8925/submit_3ec57d84-4297-411b-a9e2-4dc8271a8925.sh
### # fcidumps_benchmark/FCIDUMP_b_sto-3g
### sbatch ../301f3dd5-58c8-45e7-8af1-27dece173fe2/submit_301f3dd5-58c8-45e7-8af1-27dece173fe2.sh
### # fcidumps_benchmark/FCIDUMP_d_1.68_b_cc-pvdz-dk_ne_12
### sbatch ../d77121f6-1c20-4d1f-a92a-9c3d997f775c/submit_d77121f6-1c20-4d1f-a92a-9c3d997f775c.sh
### # fcidumps_benchmark/FCIDUMP_d_1.68_b_cc-pvdz-dk_ne_28
### sbatch ../f872bf92-5f9b-4dcb-a69d-9c9b5b3a0644/submit_f872bf92-5f9b-4dcb-a69d-9c9b5b3a0644.sh
### # fcidumps_benchmark/FCIDUMP_d_1.68_b_sto-3g_ne_28
### sbatch ../2f32b0c6-5af1-4ae7-94f1-88a115e17395/submit_2f32b0c6-5af1-4ae7-94f1-88a115e17395.sh
### # fcidumps_benchmark/FCIDUMP_d_2.0_b_cc-pvdz-dk_ne_12
### sbatch ../caf517af-dfb9-4a5f-90b9-08e6f2c76b3b/submit_caf517af-dfb9-4a5f-90b9-08e6f2c76b3b.sh
### # fcidumps_benchmark/FCIDUMP_d_2.0_b_cc-pvdz-dk_ne_28
### sbatch ../ed6e21a0-81e2-4d5d-8e8a-620f0c6ee99e/submit_ed6e21a0-81e2-4d5d-8e8a-620f0c6ee99e.sh
### # fcidumps_benchmark/FCIDUMP_d_2.0_b_sto-3g_ne_12
### sbatch ../7f88f924-f572-4caf-a503-d699fb94e779/submit_7f88f924-f572-4caf-a503-d699fb94e779.sh
### # fcidumps_benchmark/FCIDUMP_d_2.0_b_sto-3g_ne_28
### sbatch ../5e9c375c-8e68-4942-a291-c7de82fe893e/submit_5e9c375c-8e68-4942-a291-c7de82fe893e.sh
### # fcidumps_benchmark/FCIDUMP_d_3.0_b_cc-pvdz-dk_ne_12
### sbatch ../27df35cb-a60f-41a8-a7ad-bdc97a4350c1/submit_27df35cb-a60f-41a8-a7ad-bdc97a4350c1.sh
### # fcidumps_benchmark/FCIDUMP_d_3.0_b_cc-pvdz-dk_ne_28
### sbatch ../74298482-34fd-47b8-90b9-75a3b792a4e0/submit_74298482-34fd-47b8-90b9-75a3b792a4e0.sh
### # fcidumps_benchmark/FCIDUMP_d_3.0_b_sto-3g_ne_12
### sbatch ../eb5136f2-c83e-4eec-8fcb-5c95f85ed2f2/submit_eb5136f2-c83e-4eec-8fcb-5c95f85ed2f2.sh
### # fcidumps_benchmark/FCIDUMP_d_3.0_b_sto-3g_ne_28
### sbatch ../dd18bb39-6f67-49d8-96b5-798a549a8380/submit_dd18bb39-6f67-49d8-96b5-798a549a8380.sh
# fcidumps_benchmark/FCIDUMP_g_bent_b_cc-pvdz
sbatch ../cd1e3451-84cf-4eab-9e17-da1473ba6c29/submit_cd1e3451-84cf-4eab-9e17-da1473ba6c29.sh
### # fcidumps_benchmark/FCIDUMP_g_bent_b_cc-pvtz
### sbatch ../3763f681-3cdc-4524-85e9-4d47ac2c0bc4/submit_3763f681-3cdc-4524-85e9-4d47ac2c0bc4.sh
### # fcidumps_benchmark/FCIDUMP_g_ring_b_cc-pvdz
### sbatch ../07d486cf-b7a5-4427-b21b-881a5bf981cd/submit_07d486cf-b7a5-4427-b21b-881a5bf981cd.sh
### # fcidumps_benchmark/FCIDUMP_g_ring_b_cc-pvtz
### sbatch ../90d20e14-7c18-4354-8de2-27e2a1360103/submit_90d20e14-7c18-4354-8de2-27e2a1360103.sh
### prepare_calcs_gsee_benchmark_coarse_set_run3.py
### ########################################################
# fcidumps_benchmark/fcidump.59_5_16_noncan_0.2_new
sbatch ../079889f3-1444-4015-9ab9-deb0a3384ba1/submit_079889f3-1444-4015-9ab9-deb0a3384ba1.sh
# fcidumps_benchmark/fcidump.60_5_16_noncan_0.2_new
sbatch ../1c5fb01c-b4a9-424d-a939-4408c96f63fe/submit_1c5fb01c-b4a9-424d-a939-4408c96f63fe.sh
# fcidumps_benchmark/fcidump.61_3_15_af_noncan_0.2_new
sbatch ../f0ce715c-0413-4cc4-b944-8e53d3b8a61b/submit_f0ce715c-0413-4cc4-b944-8e53d3b8a61b.sh
# fcidumps_benchmark/fcidump.62_3_15_af_noncan_0.2_new
sbatch ../7468274e-1270-4c7d-b92b-9898b33603a4/submit_7468274e-1270-4c7d-b92b-9898b33603a4.sh
# fcidumps_benchmark/fcidump.63_5_15_af_ts_noncan_0.2_new
sbatch ../a5089352-7a01-43f5-9640-cc4d173ab7e5/submit_a5089352-7a01-43f5-9640-cc4d173ab7e5.sh
# fcidumps_benchmark/fcidump.64_5_15_af_ts_noncan_0.2_new
sbatch ../07f969b9-648f-4e38-bc74-7436a2b1ebae/submit_07f969b9-648f-4e38-bc74-7436a2b1ebae.sh
# fcidumps_benchmark/fcidump.65_5_15_af_noncan_0.2_new
sbatch ../32fedc5d-c0e8-4346-8031-a0b7929800f8/submit_32fedc5d-c0e8-4346-8031-a0b7929800f8.sh
# fcidumps_benchmark/fcidump.66_5_15_af_noncan_0.2_new
sbatch ../0f454426-efd4-4fb5-be89-fb36b2ab7391/submit_0f454426-efd4-4fb5-be89-fb36b2ab7391.sh
### prepare_calcs_gsee_benchmark_coarse_set_run4.py
### ########################################################
# fcidumps_benchmark/FCIDUMP_L_6x6_Ut_2.0
sbatch ../e14e1a8a-5a8c-4efa-891e-a09fdc45efb8/submit_e14e1a8a-5a8c-4efa-891e-a09fdc45efb8.sh
# fcidumps_benchmark/FCIDUMP_L_6x6_Ut_4.0
sbatch ../82762dcb-3260-4772-9410-657cbfe80237/submit_82762dcb-3260-4772-9410-657cbfe80237.sh
# fcidumps_benchmark/FCIDUMP_L_6x6_Ut_8.0
sbatch ../3cc3c421-93d7-4d36-a717-96b85e5cf1fa/submit_3cc3c421-93d7-4d36-a717-96b85e5cf1fa.sh
# fcidumps_benchmark/FCIDUMP_b_cc-pvdz
sbatch ../df22f65d-ee8f-4f89-93e2-f5183df86052/submit_df22f65d-ee8f-4f89-93e2-f5183df86052.sh
# fcidumps_benchmark/FCIDUMP_b_sto-3g
sbatch ../2e810de4-9276-4551-9090-adff21bd3fdf/submit_2e810de4-9276-4551-9090-adff21bd3fdf.sh
# fcidumps_benchmark/FCIDUMP_d_1.68_b_cc-pvdz-dk_ne_12
sbatch ../8180f3d5-227a-4367-ab2a-d0b1a3bdbfa6/submit_8180f3d5-227a-4367-ab2a-d0b1a3bdbfa6.sh
# fcidumps_benchmark/FCIDUMP_d_1.68_b_cc-pvdz-dk_ne_28
sbatch ../e0a6f050-c351-4afc-87ee-630b134f7633/submit_e0a6f050-c351-4afc-87ee-630b134f7633.sh
# fcidumps_benchmark/FCIDUMP_d_1.68_b_sto-3g_ne_12
sbatch ../cddfd0fb-521a-4026-a6ed-e8d9ab857d3f/submit_cddfd0fb-521a-4026-a6ed-e8d9ab857d3f.sh
# fcidumps_benchmark/FCIDUMP_d_1.68_b_sto-3g_ne_28
sbatch ../5553b67e-70b1-4b00-8f58-b2e2f6e130b7/submit_5553b67e-70b1-4b00-8f58-b2e2f6e130b7.sh
# fcidumps_benchmark/FCIDUMP_d_2.0_b_cc-pvdz-dk_ne_12
sbatch ../d5d3afb5-c4fa-416c-958a-1758ba62cfdb/submit_d5d3afb5-c4fa-416c-958a-1758ba62cfdb.sh
# fcidumps_benchmark/FCIDUMP_d_2.0_b_cc-pvdz-dk_ne_28
sbatch ../c99a0469-be93-4f8a-949d-9e85d95e7f7d/submit_c99a0469-be93-4f8a-949d-9e85d95e7f7d.sh
# fcidumps_benchmark/FCIDUMP_d_2.0_b_sto-3g_ne_12
sbatch ../86da03bf-9604-403e-9794-7cbf2718e2ea/submit_86da03bf-9604-403e-9794-7cbf2718e2ea.sh
# fcidumps_benchmark/FCIDUMP_d_2.0_b_sto-3g_ne_28
sbatch ../e7592f67-97b5-4bb8-833c-b15f68ed5827/submit_e7592f67-97b5-4bb8-833c-b15f68ed5827.sh
# fcidumps_benchmark/FCIDUMP_d_3.0_b_cc-pvdz-dk_ne_12
sbatch ../f888c7a8-fa98-4618-bf65-5e22a437eb8d/submit_f888c7a8-fa98-4618-bf65-5e22a437eb8d.sh
# fcidumps_benchmark/FCIDUMP_d_3.0_b_cc-pvdz-dk_ne_28
sbatch ../ce96d925-667f-4f25-85d5-c619df4fb292/submit_ce96d925-667f-4f25-85d5-c619df4fb292.sh
# fcidumps_benchmark/FCIDUMP_d_3.0_b_sto-3g_ne_12
sbatch ../17e71761-7dab-4086-bc0e-f2c7c563c966/submit_17e71761-7dab-4086-bc0e-f2c7c563c966.sh
# fcidumps_benchmark/FCIDUMP_d_3.0_b_sto-3g_ne_28
sbatch ../79192bbd-cb8b-4ff3-986b-dc7bd61bab48/submit_79192bbd-cb8b-4ff3-986b-dc7bd61bab48.sh
# fcidumps_benchmark/FCIDUMP_g_bent_b_cc-pvtz
sbatch ../8f37335a-a9b0-44bd-8f8c-c46ad620199e/submit_8f37335a-a9b0-44bd-8f8c-c46ad620199e.sh
# fcidumps_benchmark/FCIDUMP_g_ring_b_cc-pvdz
sbatch ../4b4c321e-9d4f-4a77-b974-60f196600970/submit_4b4c321e-9d4f-4a77-b974-60f196600970.sh
# fcidumps_benchmark/FCIDUMP_g_ring_b_cc-pvtz
sbatch ../03d248fe-110a-495f-86ca-1822f33f6184/submit_03d248fe-110a-495f-86ca-1822f33f6184.sh
### prepare_calcs_gsee_benchmark_coarse_set_run5.py
### ########################################################
# fcidumps_benchmark/FCIDUMP_L_6x6_Ut_2.0
sbatch ../258d7586-e8cb-4ecf-9266-487f8f5ab7ac/submit_258d7586-e8cb-4ecf-9266-487f8f5ab7ac.sh
# fcidumps_benchmark/FCIDUMP_L_6x6_Ut_4.0
sbatch ../247d84b9-5c7c-49f7-83bc-ddcaa2868314/submit_247d84b9-5c7c-49f7-83bc-ddcaa2868314.sh
# fcidumps_benchmark/FCIDUMP_b_sto-3g
sbatch ../78d3b2f0-7d9d-4ca6-9afb-048c320a520d/submit_78d3b2f0-7d9d-4ca6-9afb-048c320a520d.sh
# fcidumps_benchmark/FCIDUMP_d_1.68_b_sto-3g_ne_12
sbatch ../77e8718a-17e7-4710-9273-bf57c39f8abe/submit_77e8718a-17e7-4710-9273-bf57c39f8abe.sh
# fcidumps_benchmark/FCIDUMP_d_1.68_b_sto-3g_ne_28
sbatch ../2bee219c-de13-46d0-9e0b-55ed8c05a61d/submit_2bee219c-de13-46d0-9e0b-55ed8c05a61d.sh
# fcidumps_benchmark/FCIDUMP_d_2.0_b_sto-3g_ne_12
sbatch ../5c07263f-9f70-428c-901c-ccd1b841f422/submit_5c07263f-9f70-428c-901c-ccd1b841f422.sh
# fcidumps_benchmark/FCIDUMP_d_2.0_b_sto-3g_ne_28
sbatch ../13215551-94fc-4813-b4bf-ce5d459ef306/submit_13215551-94fc-4813-b4bf-ce5d459ef306.sh
# fcidumps_benchmark/FCIDUMP_d_3.0_b_sto-3g_ne_12
sbatch ../0e74f7aa-31f0-494c-b9d2-c83ff654074d/submit_0e74f7aa-31f0-494c-b9d2-c83ff654074d.sh
# fcidumps_benchmark/FCIDUMP_d_3.0_b_sto-3g_ne_28
sbatch ../6b3a675b-80b8-4d05-919b-33d1124fd119/submit_6b3a675b-80b8-4d05-919b-33d1124fd119.sh
# fcidumps_benchmark/FCIDUMP_g_ring_b_cc-pvdz
sbatch ../be3636dd-1f57-4bde-abaf-0055c4dcf78e/submit_be3636dd-1f57-4bde-abaf-0055c4dcf78e.sh
"""

In [3]:
extrapolation_dict = {
    "c3b3913d-8fe0-4f08-ac1b-b9d4a62ee370": { # FCIDUMP_L_4x4_Ut_2.0
        "energy": -15.535596724273375,
        "energy_95_ci": 0.000018545210676,
        "extrapolated_bond_dimension": 1184,
        "extrapolated_bond_dimension_lower_bound": 780,
        "extrapolated_bond_dimension_upper_bound": 1840,
        # "problem_instance_uuid": ,
        # "instance_data_object_uuid": ,
    },
    "2d6b0ad9-ace6-4c54-bfb8-8f6b74f73ca2": None,
    # "2d6b0ad9-ace6-4c54-bfb8-8f6b74f73ca2_trimmed": None,
    "a9cf5764-946b-4f60-80a2-0b35434e1edd": None,
    "2fdb5351-f05b-47af-af7f-a5d979aefbee": { # FCIDUMP_g_bent_b_sto-3g
        "energy": -221.55895092155683,
        "energy_95_ci": 0.000030277540098,
        "extrapolated_bond_dimension": 29,
        "extrapolated_bond_dimension_lower_bound": 19,
        "extrapolated_bond_dimension_upper_bound": 46,
    },
    "1c03f568-afd2-4673-83df-55f57a3b33b5": { # FCIDUMP_g_ring_b_sto-3g
        "energy": -221.57897267925446,
        "energy_95_ci": 0.000707209581446,
        "extrapolated_bond_dimension": 31,
        "extrapolated_bond_dimension_lower_bound": 20,
        "extrapolated_bond_dimension_upper_bound": 52,
    },
    # "1c03f568-afd2-4673-83df-55f57a3b33b5_trimmed": None,
    "8a6294b8-e8bf-49e8-b76b-4dde7e589aac": { # FCIDUMP_L_4x4_Ut_4.0
        "energy": -1.124615506169888e01,
        "energy_95_ci": 2.075261710211621e-05,
        "extrapolated_bond_dimension": 837,
        "extrapolated_bond_dimension_lower_bound": 796,
        "extrapolated_bond_dimension_upper_bound": 881,
    },
    "91df2a51-ec2d-48c1-a44d-e47321fd2dc6": { # FCIDUMP_L_4x4_Ut_8.0
        "energy": -6.808432968693086e00,
        "energy_95_ci": 2.275661115890902e-05,
        "extrapolated_bond_dimension": 327,
        "extrapolated_bond_dimension_lower_bound": 313,
        "extrapolated_bond_dimension_upper_bound": 342,
    },
    "cd1e3451-84cf-4eab-9e17-da1473ba6c29": { # FCIDUMP_g_bent_b_cc-pvdz
        "energy": -2.249131938640924e02,
        "energy_95_ci": 1.336501171758497e-03,
        "extrapolated_bond_dimension": 7728,
        "extrapolated_bond_dimension_lower_bound": 7142,
        "extrapolated_bond_dimension_upper_bound": 8374,
    },
    "079889f3-1444-4015-9ab9-deb0a3384ba1": { # fcidump.59_5_16_noncan_0.2_new
        "energy": -1.321593612370238e03,
        "energy_95_ci": 3.189041808771883e-05,
        "extrapolated_bond_dimension": 71,
        "extrapolated_bond_dimension_lower_bound": 65,
        "extrapolated_bond_dimension_upper_bound": 77,
    },
    "1c5fb01c-b4a9-424d-a939-4408c96f63fe": { # fcidump.60_5_16_noncan_0.2_new
        "energy": -1.322107039844683e03,
        "energy_95_ci": 1.848453038429909e-05,
        "extrapolated_bond_dimension": 617,
        "extrapolated_bond_dimension_lower_bound": 545,
        "extrapolated_bond_dimension_upper_bound": 700,
    },
    "f0ce715c-0413-4cc4-b944-8e53d3b8a61b": { # fcidump.61_3_15_af_noncan_0.2_new
        "energy": -1.322582234109127e03,
        "energy_95_ci": 4.804661015269527e-05,
        "extrapolated_bond_dimension": 188,
        "extrapolated_bond_dimension_lower_bound": 171,
        "extrapolated_bond_dimension_upper_bound": 206,
    },
    "7468274e-1270-4c7d-b92b-9898b33603a4": { # fcidump.62_3_15_af_noncan_0.2_new
        "energy": -1.322661962118007e03,
        "energy_95_ci": 2.459384851006630e-05,
        "extrapolated_bond_dimension": 723,
        "extrapolated_bond_dimension_lower_bound": 695,
        "extrapolated_bond_dimension_upper_bound": 752,
    },
    "a5089352-7a01-43f5-9640-cc4d173ab7e5": { # fcidump.63_5_15_af_ts_noncan_0.2_new
        "energy": -1.322594438035404e03,
        "energy_95_ci": 2.843419017733956e-05,
        "extrapolated_bond_dimension": 75,
        "extrapolated_bond_dimension_lower_bound": 71,
        "extrapolated_bond_dimension_upper_bound": 80,
    },
    "07f969b9-648f-4e38-bc74-7436a2b1ebae": { # fcidump.64_5_15_af_ts_noncan_0.2_new
        "energy": -1.322651998080473e03,
        "energy_95_ci": 3.204475191489623e-05,
        "extrapolated_bond_dimension": 340,
        "extrapolated_bond_dimension_lower_bound": 311,
        "extrapolated_bond_dimension_upper_bound": 373,
    },
    "32fedc5d-c0e8-4346-8031-a0b7929800f8": { # fcidump.65_5_15_af_noncan_0.2_new
        "energy": -1.322557395506740e03,
        "energy_95_ci": 2.755012857708406e-05,
        "extrapolated_bond_dimension": 73,
        "extrapolated_bond_dimension_lower_bound": 65,
        "extrapolated_bond_dimension_upper_bound": 81,
    },
    "0f454426-efd4-4fb5-be89-fb36b2ab7391": { # fcidump.66_5_15_af_noncan_0.2_new
        "energy": -1.322656928960710e03,
        "energy_95_ci": 1.334577522488482e-05,
        "extrapolated_bond_dimension": 237,
        "extrapolated_bond_dimension_lower_bound": 218,
        "extrapolated_bond_dimension_upper_bound": 258,
    },
    "cd1e3451-84cf-4eab-9e17-da1473ba6c29": {  # FCIDUMP_g_bent_b_cc-pvdz
        "energy": -2.249124066670465e02,
        "energy_95_ci": 1.199101553129490e-03,
        "extrapolated_bond_dimension": 6649,
        "extrapolated_bond_dimension_lower_bound": 5519,
        "extrapolated_bond_dimension_upper_bound": 8066,
    },
    "3cc3c421-93d7-4d36-a717-96b85e5cf1fa": {  # FCIDUMP_L_6x6_Ut_8.0
        "energy": -1.648626412075126e01,
        "energy_95_ci": 6.752100901260352e-04,
        "extrapolated_bond_dimension": 12231,
        "extrapolated_bond_dimension_lower_bound": 11164,
        "extrapolated_bond_dimension_upper_bound": 13419,
    },
    "4b4c321e-9d4f-4a77-b974-60f196600970": {  # FCIDUMP_g_ring_b_cc-pvdz
        "energy": -2.248762049443819e02,
        "energy_95_ci": 2.961311473403899e-03,
        "extrapolated_bond_dimension": 37535,
        "extrapolated_bond_dimension_lower_bound": 13927,
        "extrapolated_bond_dimension_upper_bound": 126037,
    },
    "17e71761-7dab-4086-bc0e-f2c7c563c966": {  # FCIDUMP_d_3.0_b_sto-3g_ne_12
        "energy": -1.567739602011705e02,
        "energy_95_ci": 1.746224484355911e-06,
        "extrapolated_bond_dimension": 89,
        "extrapolated_bond_dimension_lower_bound": 7,
        "extrapolated_bond_dimension_upper_bound": 2814,
    },
    "f888c7a8-fa98-4618-bf65-5e22a437eb8d": {  # FCIDUMP_d_3.0_b_cc-pvdz-dk_ne_12
        "energy": -2.138693831873935e02,
        "energy_95_ci": 3.194943990275421e-03,
        "extrapolated_bond_dimension": 735,
        "extrapolated_bond_dimension_lower_bound": 67,
        "extrapolated_bond_dimension_upper_bound": 62010,
    },
    "e7592f67-97b5-4bb8-833c-b15f68ed5827": {  # FCIDUMP_d_2.0_b_sto-3g_ne_28
        "energy": -4.707423613702206e02,
        "energy_95_ci": 1.355138309888761e-02,
        "extrapolated_bond_dimension": 5198,
        "extrapolated_bond_dimension_lower_bound": 3861,
        "extrapolated_bond_dimension_upper_bound": 7116,
    },
    "86da03bf-9604-403e-9794-7cbf2718e2ea": {  # FCIDUMP_d_2.0_b_sto-3g_ne_12
        "energy": -1.293398458823723e02,
        "energy_95_ci": 9.373647729860256e-05,
        "extrapolated_bond_dimension": 84,
        "extrapolated_bond_dimension_lower_bound": 68,
        "extrapolated_bond_dimension_upper_bound": 104,
    },
    "13215551-94fc-4813-b4bf-ce5d459ef306": {  # FCIDUMP_d_2.0_b_sto-3g_ne_28
        "energy": -4.707185128567409e02,
        "energy_95_ci": 6.130541128628579e-04,
        "extrapolated_bond_dimension": 3087,
        "extrapolated_bond_dimension_lower_bound": 2366,
        "extrapolated_bond_dimension_upper_bound": 4078,
    },
    # "258d7586-e8cb-4ecf-9266-487f8f5ab7ac" : { # FCIDUMP_L_6x6_Ut_2.0
    #     "energy": -3.817621853655661e+01,
    #     "energy_95_ci": 7.391859913538647e-02,
    #     "extrapolated_bond_dimension": 65077478,
    #     "extrapolated_bond_dimension_lower_bound": 58582229,
    #     "extrapolated_bond_dimension_upper_bound": 72401863,
    # },
    # "247d84b9-5c7c-49f7-83bc-ddcaa2868314" : { # FCIDUMP_L_6x6_Ut_4.0
    #     "energy": -2.722403873581588e+01,
    #     "energy_95_ci": 1.062988116650764e-02,
    #     "extrapolated_bond_dimension": 70757,
    #     "extrapolated_bond_dimension_lower_bound": 66382,
    #     "extrapolated_bond_dimension_upper_bound": 75470,
    # },
    # "78d3b2f0-7d9d-4ca6-9afb-048c320a520d" : { # FCIDUMP_b_sto-3g
    #     "energy": -6.441841060520885e+01,
    #     "energy_95_ci": 1.185295040913657e-02,
    #     "extrapolated_bond_dimension": 381305,
    #     "extrapolated_bond_dimension_lower_bound": 278933,
    #     "extrapolated_bond_dimension_upper_bound": 530154,
    # },
    # "0e74f7aa-31f0-494c-b9d2-c83ff654074d" : { # FCIDUMP_d_3.0_b_sto-3g_ne_12
    #     "energy": -1.567017706878111e+02,
    #     "energy_95_ci": 4.558106048901648e-06,
    #     "extrapolated_bond_dimension": 18,
    #     "extrapolated_bond_dimension_lower_bound": 36,
    #     "extrapolated_bond_dimension_upper_bound": 443,
    # },
    # "be3636dd-1f57-4bde-abaf-0055c4dcf78e" : { # FCIDUMP_g_ring_b_cc-pvdz
    #     "energy": -2.248752929145347e+02,
    #     "energy_95_ci": 2.336423006677821e-02,
    #     "extrapolated_bond_dimension": 27140,
    #     "extrapolated_bond_dimension_lower_bound": 7655,
    #     "extrapolated_bond_dimension_upper_bound": 139999,
    # },
}

In [4]:
short_name_mn_mono = "mn_mono"
problem_instance_uuid_mn_mono = "cb40f3f7-ffe8-40e8-4544-f26aad5a8bd8"
short_name_benzene = "benzene"
problem_instance_uuid_benzene = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
short_name_chromium_dimer = "chromium_dimer"
problem_instance_uuid_chromium_dimer = "cccccccc-cccc-cccc-cccc-cccccccccccc"
short_name_ozone = "ozone"
problem_instance_uuid_ozone = "dddddddd-dddd-dddd-dddd-dddddddddddd"

short_name_fermi_hubbard_1d = "fermi_hubbard_1d"
problem_instance_uuid_fermi_hubbard_1d = "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee"

short_name_fermi_hubbard_2d = "fermi_hubbard_2d"
problem_instance_uuid_fermi_hubbard_2d = "ffffffff-ffff-ffff-ffff-ffffffffffff"

# TODO: coarse instances names
instance_dict = {
    "079889f3-1444-4015-9ab9-deb0a3384ba1": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "68af0b80-3d27-4aba-84f9-bcdd30a9255b",  # fcidump.59_5_16_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "1c5fb01c-b4a9-424d-a939-4408c96f63fe": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "28a7820f-63fe-4920-aeec-a7ffe7e55d83",  # fcidump.60_5_16_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "f0ce715c-0413-4cc4-b944-8e53d3b8a61b": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "f738fcd6-7ddc-4d70-8ff9-4019e3718b04",  # fcidump.61_3_15_af_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "7468274e-1270-4c7d-b92b-9898b33603a4": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "6e2bf415-6a69-4b36-ba0f-780a11cb7c0b",  # fcidump.62_3_15_af_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "a5089352-7a01-43f5-9640-cc4d173ab7e5": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "027490ba-34f9-4340-89ab-27fd110d2821",  # fcidump.63_5_15_af_ts_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "07f969b9-648f-4e38-bc74-7436a2b1ebae": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "bae2da57-6a69-483e-95bc-b77f72ebfba8",  # fcidump.64_5_15_af_ts_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "32fedc5d-c0e8-4346-8031-a0b7929800f8": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "72343006-774e-4192-b481-fa840ed25573",  # fcidump.65_5_15_af_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "0f454426-efd4-4fb5-be89-fb36b2ab7391": {
        "problem_instance_uuid": problem_instance_uuid_mn_mono,
        "instance_data_object_uuid": "ea55abec-8253-445d-85fa-914948b5e5a5",  # fcidump.66_5_15_af_noncan_0.2_new
        "short_name": short_name_mn_mono,
    },
    "c3b3913d-8fe0-4f08-ac1b-b9d4a62ee370": {
        "problem_instance_uuid": problem_instance_uuid_fermi_hubbard_2d,
        "instance_data_object_uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa1",
        "short_name": short_name_fermi_hubbard_2d,
    },
    "2fdb5351-f05b-47af-af7f-a5d979aefbee": {
        "problem_instance_uuid": problem_instance_uuid_ozone,
        "instance_data_object_uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
        "short_name": short_name_ozone,
    },
    "1c03f568-afd2-4673-83df-55f57a3b33b5": {
        "problem_instance_uuid": problem_instance_uuid_ozone,
        "instance_data_object_uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
        "short_name": short_name_ozone,
    },
    "8a6294b8-e8bf-49e8-b76b-4dde7e589aac": {
        "problem_instance_uuid": problem_instance_uuid_fermi_hubbard_2d,
        "instance_data_object_uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa2",
        "short_name": short_name_fermi_hubbard_2d,
    },
    "91df2a51-ec2d-48c1-a44d-e47321fd2dc6": {
        "problem_instance_uuid": problem_instance_uuid_fermi_hubbard_2d,
        "instance_data_object_uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaa3",
        "short_name": short_name_fermi_hubbard_2d,
    },
    "cd1e3451-84cf-4eab-9e17-da1473ba6c29": {
        "problem_instance_uuid": problem_instance_uuid_ozone,
        "instance_data_object_uuid": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
        "short_name": short_name_ozone,
    },
    
}

In [5]:
bd_extrapolation_dict = None
# bd_extrapolation_dict = {
# "fcidump.30_4a_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":23,
# "fcidump.31_4a_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":1906,
# "fcidump.20_rc_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":299,
# "fcidump.21_rc_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":4126,
# "fcidump.24_ts_1over2_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)":92,
# "fcidump.25_ts_1over2_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)":4076,
# "fcidump.26_pc_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":332,
# "fcidump.27_pc_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":4888,
# "fcidump.28_2_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":583,
# "fcidump.29_2_{'Mo'_ 'def2-SVP', 'I'_ 'def2-SVP', 'Cl'_ 'def2-SVP', 'default'_ '6-311+G(d,p)'}":6434,
# # "fcidump.9_mo_n2-_{'Mo'_ 'def2-TZVP', 'default'_ 'def2-SVP'}":,
# # "fcidump.10_fecp2+_s0.5_def2-tzvp":,
# # "fcidump.11_fecp2_s0_def2-tzvp":,
# # "fcidump.12_mo_n2_{'Mo'_ 'def2-TZVP', 'default'_ 'def2-SVP'}":,
# }

In [6]:
# Load path from text file
private_path_1_file = "private_path_1.txt"
private_path_1 = Path(private_path_1_file).read_text().strip()

private_path_2_file = "private_path_2.txt"
private_path_2 = Path(private_path_2_file).read_text().strip()


slurm_emails_folder = private_path_1 / Path('zapata_data/benchmark_coarse_set_slurm_emails')

fci_dump_folder  = private_path_2 / Path("zapata/gsee_benchmark_private/fcidumps_benchmark")
catalyst_meta_data_path=private_path_2 / Path('zapata/gsee_benchmark_private/benchmark_coarse_set_metadata.csv')
data_file_path = [ 
                    private_path_1 / Path("zapata_data/dmrg_data/data_storage_20240809_prepare_calcs_gsee_benchmark_coarse_set_run1"),
                    private_path_1 / Path("zapata_data/dmrg_data/data_storage_20240814_prepare_calcs_gsee_benchmark_coarse_set_run2"),
                    private_path_1 / Path("zapata_data/dmrg_data/data_storage_20240823_prepare_calcs_gsee_benchmark_coarse_set_run3"),
                    private_path_1 / Path("zapata_data/dmrg_data/data_storage_20240831_prepare_calcs_gsee_benchmark_coarse_set_run4"),
                    private_path_1 / Path("zapata_data/dmrg_data/hdf5_20240910_prepare_calcs_gsee_benchmark_coarse_set_run5_hdf5"),
                    ]

excel_storage_path = private_path_2 / Path("zapata/gsee_benchmark_results/coarse_set/")
csv_storage_path = excel_storage_path/Path("performance_metrics_store")


excel_name=Path('dmrg_results_gsee_benchmark_coarse_set.xlsx')
memory_summary_csv_filename = excel_storage_path/Path('gsee_benchmark_coarse_set_memory_summary.csv')


schema_file = "solution.schema.0.0.1.json"


In [7]:
catalyst_meta_data = pd.read_csv(catalyst_meta_data_path)

In [None]:

cc_wall_time_dict = {}
# Get all eml files in the folder
slurm_email_files = list(slurm_emails_folder.glob('*.eml'))
print(slurm_email_files)
for email_file in slurm_email_files:
    print(email_file)
    with open(email_file, 'r') as f:
        use_next_line_subject = False
        obtained_run_time = False
        job_id = None
        lines = f.readlines()
        for iter,line in enumerate(lines):
            # if use_next_line_subject:
            #     line = line.strip()
            #     run_time = line.split(', ')[0].split(' ')[-1]
            #     # print(run_time)
            #     obtained_run_time = True
            #     use_next_line_subject = False
            if line.startswith('Subject: '):
                # use_next_line_subject = True
                #"Subject: Niagara slurm Job_id=12645129 Name=dmrg_thresholding Failed, Run"
                # "Subject: Niagara slurm Job_id=12645129 Name=dmrg_thresholding Failed, Run
                # time 06:42:25, FAILED, ExitCode 1"
                # Join the next line, failed or not
                line = line.strip()
                next_line = lines[iter+1].strip()
                next_next_line = lines[iter+2].strip()
                joined_line = line + " " + next_line + " " + next_next_line
                # print(joined_line)
                #Use Regular expression to get the run time
                reg_exp = r'(?<=Run time )\d+:\d+:\d+'
                
                run_time = re.search(reg_exp, joined_line).group()
                # print(run_time)
                obtained_run_time = True

                # Get the job id
                job_id = line.split('Job_id=')[1].split(' ')[0]
                # print(line)
            elif line.startswith('   Command=../'):
                line = line.strip()
                uuid = line.split('/')[1]
                # print(uuid)
                assert obtained_run_time, 'Run time not yet obtained'
                cc_wall_time_dict[uuid] = run_time
            elif job_id is not None and line.startswith(f"{job_id}.ba+"):
                # 12645129.ba+      batch rrg-izmay+   06:42:25 219926900K  30851368K   04:03:30 8-12:22:49      1:0 
                # Get memory usage via regex, it is the second number (5th column), not the first, use the run time to get the correct number
                reg_exp = r'\d+K' 
                # Get all occurrences of memory usage
                memory_usage = re.findall(reg_exp, line)

                
                # memory_usage = re.search(reg_exp, line).group()
                # print(memory_usage)
                memory_usage_virtual = memory_usage[0]
                memory_usage_virtual = int(memory_usage_virtual[:-1])*1024
                cc_wall_time_dict[f"{uuid}_virtual_mem"] = memory_usage_virtual
                memory_usage_rss = memory_usage[1]
                memory_usage_rss = int(memory_usage_rss[:-1])*1024
                cc_wall_time_dict[f"{uuid}_rss_mem"] = memory_usage_rss
                assert memory_usage_rss <= memory_usage_virtual, 'RSS memory usage is greater than virtual memory usage'
                # print(line)
                # print(memory_usage_virtual)
                # print(memory_usage_rss)


                # # Get memory usage
                # memory_usage = line.split()[5]
                # memory_usage = int(memory_usage[:-1])*1024
                # print(line)



                
print(cc_wall_time_dict)

In [None]:
#Load all fcidump file names
fcidump_files = [filename.name for filename in list(fci_dump_folder.glob('FCIDUMP*'))]
fcidump_files.extend([filename.name for filename in list(fci_dump_folder.glob('fcidump*'))])
fcidump_files


In [None]:
fcidump_folder_name = "fcidumps_benchmark"
buf = io.StringIO(run_commands)
orig_data_dict_list = []
for line in buf.readlines():
    print(line)
    if line.startswith("### "):
        continue
    if line.startswith(f"# {fcidump_folder_name}/"):
        fcidump_name_temp = line.split(f"{fcidump_folder_name}/")[1].strip()
        dict_entry = {}
        print("New Dict Entry")
        # print(fcidump_name_temp)
        # if fcidump_name.endswith('}'):
        #     fcidump_test_string = fcidump_name.split('{')[0]
        # else:
        #     basis_end = fcidump_name.split('_')[-1]
        #     fcidump_test_string = fcidump_name.split('_'+basis_end)[0]+'_'
        # fcidump_test_string = fcidump_name_temp.split("_")[0] + "_"
        fcidump_test_string = fcidump_name_temp
        # print(fcidump_test_string)
        for fcidump_name in fcidump_files:
            if fcidump_name.startswith(fcidump_test_string):
                break
        # print(fcidump_name)
        dict_entry["fcidump"] = fcidump_name
        dict_entry["fcidump_test_string"] = fcidump_test_string
    elif line.startswith("sbatch ../"):
        calc_uuid = line.split("sbatch ../")[1].split("/")[0]
        print(calc_uuid)
        dict_entry["Calc UUID"] = calc_uuid
        dict_entry["CC Wall Time"] = cc_wall_time_dict[calc_uuid]
        dict_entry["Attempt Result"] = ""
        dict_entry["Virtual Memory Usage"] = cc_wall_time_dict[
            f"{calc_uuid}_virtual_mem"
        ]
        dict_entry["RSS Memory Usage"] = cc_wall_time_dict[f"{calc_uuid}_rss_mem"]
        orig_data_dict_list.append(dict_entry)

print(orig_data_dict_list)

In [11]:
# catalyst_meta_data.columns

In [12]:
data_dict_list = []
for data_dict in orig_data_dict_list:
    new_data_dict = {}
    fcidump_test_string = data_dict['fcidump_test_string']
    #Get metadata row
    data_row = catalyst_meta_data[catalyst_meta_data['mean_field_obejct_from_fcidump'].str.contains(fcidump_test_string)]
    new_data_dict['instance ID'] = data_row['cat_tag'].values[0]
    new_data_dict['molecule'] = data_row['molecule'].values[0]
    new_data_dict['charge'] = data_row['charge'].values[0]
    new_data_dict['multiplicity'] = data_row['multiplicity'].values[0]
    new_data_dict['fcidump'] = data_dict['fcidump']
    new_data_dict['num_orbitals'] = data_row['norb'].values[0]
    new_data_dict['num_electrons'] = data_row['ne'].values[0]
    new_data_dict['log10_hilbert_space_size'] = np.log10(sp.special.comb(2*new_data_dict['num_orbitals'], new_data_dict['num_electrons']))
    new_data_dict['Attempt Result'] = data_dict['Attempt Result']
    new_data_dict['Calc UUID'] = data_dict['Calc UUID']
    new_data_dict['CC Wall Time'] = data_dict['CC Wall Time']
    new_data_dict['Virtual Memory Usage (GiB)'] = data_dict['Virtual Memory Usage']/1024/1024/1024
    new_data_dict['RSS Memory Usage (GiB)'] = data_dict['RSS Memory Usage']/1024/1024/1024
    if 'Calc UUID Small BD' in data_dict:
        new_data_dict['Calc UUID Small BD'] = data_dict['Calc UUID Small BD']
        new_data_dict['CC Wall Time Small BD'] = data_dict['CC Wall Time Small BD']
    data_dict_list.append(new_data_dict)



    



    

In [None]:
data_dict_list

In [None]:
wb = px.Workbook()

dp.setup_workbook(
    data_file_path=data_file_path,
    data_dict_list=data_dict_list,
    workbook=wb,
    csv_storage_path=csv_storage_path,
    bd_extrapolation_dict=bd_extrapolation_dict,
    memory_summary_csv_filename=memory_summary_csv_filename,
    csv_uuid=True
)

wb.save(excel_storage_path / excel_name)

In [15]:
contact_info = [{
    "name": "Joshua T. Cantin",
    "email": "joshua.cantin@utoronto.ca",
    "institution": "University of Toronto at Scarborough",
}]
compute_details = {
    "Machine": "Niagara Cluster, Compute Canada",
    "CPU": '40 Intel "Skylake" cores at 2.4 GHz or 40 Intel "CascadeLake" cores at 2.5 GHz',
    "RAM": "202 GB (188 GiB)",
}

In [None]:
json_filename_list = dp.produce_set_of_solution_json_files(
    data_file_path=data_file_path,
    data_dict_list=data_dict_list,
    json_storage_path=csv_storage_path,
    extrapolation_dict=extrapolation_dict,
    memory_summary_csv_filename=memory_summary_csv_filename,
    csv_uuid=True,
    contact_info=contact_info,
    compute_details=compute_details,
    instance_dict=instance_dict,
)

In [None]:
# Validate the JSON files
schema = json.load(open(schema_file))
for filename in json_filename_list:
    print(filename)
    jsonschema.validate(json.load(open(filename)), schema)


In [None]:
print("Num json files: ", len(json_filename_list))