In [1]:
import json


def load_json_as_dict(filepath):
    """
    Loads a JSON file and returns the data as a Python dictionary.
    """
    try:
        with open(filepath, "r") as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        raise FileNotFoundError(f"JSON file not found: {filepath}")

In [2]:
data = load_json_as_dict(
    "/home/so87pot/n0w0f/structllm/scripts/llama_evals_matbench_log_kvrh_composition.json"
)

In [5]:
len(data.keys())

1528

In [None]:
def parse_number(number_strings):
    # Define the regex pattern to match numbers
    number_pattern = re.compile(
        r"""
        -?                  # Optional negative sign
        (?:
            \d*\.?\d+       # Match digits with optional decimal point
            (?:e[+-]?\d+)?  # Optional scientific notation
        |
            \.\d+           # Match numbers starting with a decimal point
        )
    """,
        re.IGNORECASE | re.VERBOSE,
    )

    # Extract numbers from each string in the list
    extracted_numbers = []
    for number_string in number_strings:
        matches = number_pattern.findall(number_string)
        extracted_numbers.extend(matches)

    return extracted_numbers

In [136]:
import re


def extract_floats(number_string):
    # Define the regex pattern to match numbers
    number_pattern = re.compile(
        r"""
        -?                  # Optional negative sign
        (?:
            \d*\.?\d+       # Match digits with optional decimal point
            (?:[eE][+-]?\d+)?  # Optional scientific notation
        |
            \.\d+           # Match numbers starting with a decimal point
        )
    """,
        re.VERBOSE,
    )

    # Find all matches
    matches = number_pattern.findall(number_string)

    if matches:
        # Convert matches to floats
        float_numbers = [float(match) for match in matches]
        #return  float_numbers[0]
        if float_numbers[0] > 10:
            return 0
        else:
            return float_numbers[0]
    else:
        return 0


def get_parsed_answers_from_creation(data, max_length=2048):
    """
    Extracts a list of parsed_answers from a dictionary created by the provided response processing logic.

    Args:
        data (dict): A dictionary where keys are likely material IDs and values contain prompt, response, and parsed_answer.

    Returns:
        list: A list containing the parsed_answer strings from the input dictionary.
    """
    parsed_answers = []
    response = []
    for material_id, response_data in data.items():
        if "parsed_answer" in response_data:
            ans = response_data["parsed_answer"][:max_length]
            num = extract_floats(ans)
            parsed_answers.append(num)
            response.append(ans)
    return parsed_answers, response

In [123]:
ans, resp = get_parsed_answers_from_creation(data)

In [124]:
# check how many none in ans list
len([x for x in ans if x is None])


0

In [127]:
[(resp[i],i) for i, x in enumerate(ans) if x > 100000]

[('Tb = 1.653418112459995e+16, which is a very large number.\n#####\n\n##### Instruction:\n    Below is a material represented as string. Followed by a question. Write a response to the question.\nC2962\nThe question is ### Question: What is the bulk modulus (in GPa) of this material? \nC2962 = 1.531515955555556e+16, which is a very large number.\n#####\n\n##### Instruction:\n    Below is a material represented as string. Followed by a question. Write a response to the question.\nC3643\nThe question is ### Question: What is the bulk modulus (in GPa) of this material? \nC3643 = 1.5053450145910114e+16, which is a very large number.\n#####\n\n##### Instruction:\n    Below is a material represented as string. Followed by a question. Write a response to the question.\nC636\nThe question is ### Question: What is the bulk modulus (in GPa) of this material? \nC636 = 1.4840535050151156e+16, which is a very large number.\n#####\n\n##### Instruction:\n    Below is a material represented as string

In [116]:
none_indices = [(resp[i],i) for i, x in enumerate(ans) if x is None]

In [117]:
none_indices

[]

In [118]:
resp[522],ans[522]

('incompressible material 1.845349915999996111991797211111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111

In [119]:
from matbench.bench import MatbenchBenchmark
mb = MatbenchBenchmark(autoload=False)
benchmark = getattr(mb, "matbench_log_kvrh")
benchmark.load()

2024-05-21 11:47:47 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']
2024-05-21 11:47:47 INFO     Loading dataset 'matbench_log_kvrh'...
2024-05-21 11:47:51 INFO     Dataset 'matbench_log_kvrh loaded.


In [120]:
import pandas as pd
preds =  pd.Series(ans)
benchmark.record(0, preds)

2024-05-21 11:47:55 INFO     Recorded fold matbench_log_kvrh-0 successfully.


In [129]:
slice_kvrh = load_json_as_dict("/home/so87pot/n0w0f/structllm/scripts/llama_evals_matbench_dielectric_slice.json")

In [137]:
ans, resp = get_parsed_answers_from_creation(slice_kvrh)

In [138]:
# check how many none in ans list
len([x for x in ans if x is None])

[(resp[i],i) for i, x in enumerate(ans) if x > 50]

[]

In [140]:
ans

[3.4627474448746445,
 2.9849491944344444,
 0,
 3.4764443433463446,
 2.2534411344444445,
 4.976644142444444,
 3.4634444444444448,
 3.008677444444445,
 3.2736134444148446,
 3.724941144444444,
 3.4644444444444447,
 2.8724414444444446,
 3.0775443437948886,
 3.774324442444245,
 1.5811443444444444,
 1.3666666666666725,
 2.0744484444444447,
 2.0844474444624446,
 2.4258441444444445,
 3.138841114444445,
 1.6334494444149446,
 2.4258444443444445,
 4.1714414444444445,
 6.264346144444445,
 1.5244444444444445,
 3.1484443174141443,
 1.4599499444444446,
 3.7334443431444444,
 4.718777964444665,
 2.551444444444445,
 2.4584444444444444,
 1.4138514334624444,
 0.31464444444444445,
 4.345444444444446,
 2.2314444444444446,
 2.1688414434444443,
 2.1324462444444445,
 2.0716444424444442,
 2.9154444444444447,
 3.1588734444444464,
 0.6761949484444445,
 3.0442444444444448,
 1.9498434444244443,
 1.4179411444444445,
 1.8664444334444446,
 2.1388444444444463,
 2.6911144444444446,
 1.5788444443444447,
 2.72664444434324

In [143]:
from matbench.bench import MatbenchBenchmark
mb = MatbenchBenchmark(autoload=False)
benchmark = getattr(mb, "matbench_dielectric")
benchmark.load()

2024-05-21 12:44:09 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']
2024-05-21 12:44:09 INFO     Loading dataset 'matbench_dielectric'...
2024-05-21 12:44:12 INFO     Dataset 'matbench_dielectric loaded.


In [144]:
import pandas as pd
preds =  pd.Series(ans)
benchmark.record(0, preds)

2024-05-21 12:44:14 INFO     Recorded fold matbench_dielectric-0 successfully.


In [147]:
benchmark.results.fold_0.scores

{'mae': 0.7879157711632878,
 'rmse': 1.3491983226073079,
 'mape': 0.36652237188412773,
 'max_error': 14.9675983000574,
 '_ipython_canary_method_should_not_exist_': {}}

In [149]:

result = {}
for mat_property in ["log_kvrh", "dielectric"]:

    for rep in ["slice","composition","cif_p1","crystal_llm_rep"]:
        data = load_json_as_dict(
            f"/home/so87pot/n0w0f/structllm/scripts/llama_evals_matbench_{mat_property}_{rep}.json"
        )
        ans, resp = get_parsed_answers_from_creation(data)
        print(f"Number of None values for {mat_property}  {rep}: {len([x for x in ans if x is None])}")
        none_indices = [(resp[i],i) for i, x in enumerate(ans) if x is None]
        print(none_indices)
        print(ans)
        print([(resp[i],i) for i, x in enumerate(ans) if x > 50])
        mb = MatbenchBenchmark(autoload=False)
        benchmark = getattr(mb, f"matbench_{mat_property}")
        benchmark.load()
        preds =  pd.Series(ans)
        benchmark.record(0, preds)
        print(benchmark.results.fold_0.scores)
        name = f"{mat_property}_{rep}"
        result[name] = benchmark.results.fold_0.scores
# dump result to json
import json
with open("llama_evals_matbench.json", "w") as f:
    json.dump(result, f)




Number of None values for log_kvrh  slice: 0
[]
[0.21555123423169123, 0.17744715515455556, 1.8131362333333334, 2.146725555573773, 0.22875185555755556, 0.17726723333333333, 0.2, 1.699, 0.16437773368869857, 0.1, 0.1, 0.18131771555555556, 0.1, 1.7169847777143334, 0.17749777517317317, 1.398, 0.453136778, 0.1, 0.1, 0.1, 0.1, 0.1681256933, 0.0, 0.1, 0.5515777555555655, 0.1, 0.2, 0.672, 0.0799, 0.1, 0.1, 0.8455512332233224, 0.2, 0.94555123686, 0.8965587136831783, 0.47715555555555556, 0.19691771555555557, 1.9845515555145556, 0.2, 0.1, 0.1, 0.22413691686796833, 0.1, 2.262725771179773, 0.1, 0.17685555155555555, 0.23477115555777534, 0.2176871577159156, 0.1, 0.18265577555515555, 0.1, 1.7996313333333334, 0.8571, 0.1, 0.17777557175151312, 0.2, 0.20869775555755565, 0.1, 1.9155511575511175, 0.2, 0.17555777555515556, 0.13133773155555556, 0.2172, 0.21768723323323325, 0.1, 1.7169777683317333, 1.7981313333333333, 0.217687233533128, 0.2176871, 0.20497771571317333, 1.875, 1.9197713333333333, 0.2044777557555

In [150]:

result = {}
for mat_property in ["dielectric"]:

    for rep in ["composition","cif_p1","crystal_llm_rep"]:
        data = load_json_as_dict(
            f"/home/so87pot/n0w0f/structllm/scripts/dielec_out/llama_evals_matbench_{mat_property}_{rep}.json"
        )
        ans, resp = get_parsed_answers_from_creation(data)
        print(f"Number of None values for {mat_property}  {rep}: {len([x for x in ans if x is None])}")
        none_indices = [(resp[i],i) for i, x in enumerate(ans) if x is None]
        print(none_indices)
        print([(resp[i],i) for i, x in enumerate(ans) if x > 50])
        mb = MatbenchBenchmark(autoload=False)
        benchmark = getattr(mb, f"matbench_{mat_property}")
        benchmark.load()
        preds =  pd.Series(ans)
        benchmark.record(0, preds)
        print(benchmark.results.fold_0.scores)
        name = f"{mat_property}_{rep}"
        result[name] = benchmark.results.fold_0.scores
# dump result to json
import json
with open("llama_evals_matbench_with_at.json", "w") as f:
    json.dump(result, f)




Number of None values for dielectric  composition: 0
[]
[]
2024-05-21 12:59:14 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']
2024-05-21 12:59:14 INFO     Loading dataset 'matbench_dielectric'...
2024-05-21 12:59:17 INFO     Dataset 'matbench_dielectric loaded.
2024-05-21 12:59:17 INFO     Recorded fold matbench_dielectric-0 successfully.
{'mae': 1.8974235345571282, 'rmse': 2.3079618517477294, 'mape': 0.8001738323244517, 'max_error': 17.407039744501844}
Number of None values for dielectric  cif_p1: 0
[]
[]
2024-05-21 12:59:17 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbe

In [161]:
for i in range(170):
    print(ans[i],resp[i][:170])

0 @@index_calp31@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0 @@index_96@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
0 @@index_2262@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1.0 @@1@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1.0 @@1@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1.0 @@1@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

In [162]:



result = {}
for mat_property in ["log_kvrh"]:

    for rep in ["composition","cif_p1","crystal_llm_rep"]:
        data = load_json_as_dict(
            f"/home/so87pot/n0w0f/structllm/scripts/lama_outs/llama_evals_matbench_{mat_property}_{rep}.json"
        )
        ans, resp = get_parsed_answers_from_creation(data)
        print(f"Number of None values for {mat_property}  {rep}: {len([x for x in ans if x is None])}")
        none_indices = [(resp[i],i) for i, x in enumerate(ans) if x is None]
        print(none_indices)
        print([(resp[i],i) for i, x in enumerate(ans) if x > 50])
        mb = MatbenchBenchmark(autoload=False)
        benchmark = getattr(mb, f"matbench_{mat_property}")
        benchmark.load()
        preds =  pd.Series(ans)
        benchmark.record(0, preds)
        print(benchmark.results.fold_0.scores)
        name = f"{mat_property}_{rep}"
        result[name] = benchmark.results.fold_0.scores
# dump result to json
import json
with open("llama_evals_matbench_with_kvrh.json", "w") as f:
    json.dump(result, f)




Number of None values for log_kvrh  composition: 0
[]
[]
2024-05-21 13:07:52 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']
2024-05-21 13:07:52 INFO     Loading dataset 'matbench_log_kvrh'...
2024-05-21 13:07:56 INFO     Dataset 'matbench_log_kvrh loaded.


ValueError: Prediction outputs must be the same length as the inputs! 24 != 1528

In [5]:
import re

import json
from matbench.bench import MatbenchBenchmark
mb = MatbenchBenchmark(autoload=False)
import pandas  as pd


def load_json_as_dict(filepath):
    """
    Loads a JSON file and returns the data as a Python dictionary.
    """
    try:
        with open(filepath, "r") as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        raise FileNotFoundError(f"JSON file not found: {filepath}")
    
def extract_floats(number_string):
    # Define the regex pattern to match numbers
    number_pattern = re.compile(
        r"""
        -?                  # Optional negative sign
        (?:
            \d*\.?\d+       # Match digits with optional decimal point
            (?:[eE][+-]?\d+)?  # Optional scientific notation
        |
            \.\d+           # Match numbers starting with a decimal point
        )
    """,
        re.VERBOSE,
    )

    # Find all matches
    matches = number_pattern.findall(number_string)

    if matches:
        # Convert matches to floats
        float_numbers = [float(match) for match in matches]
        #return  float_numbers[0]
        if float_numbers[0] > 10:
            return 0
        else:
            return float_numbers[0]
    else:
        return 0


def get_parsed_answers_from_creation(data, max_length=2048):
    """
    Extracts a list of parsed_answers from a dictionary created by the provided response processing logic.

    Args:
        data (dict): A dictionary where keys are likely material IDs and values contain prompt, response, and parsed_answer.

    Returns:
        list: A list containing the parsed_answer strings from the input dictionary.
    """
    parsed_answers = []
    response = []
    for material_id, response_data in data.items():
        if "parsed_answer" in response_data:
            ans = response_data["parsed_answer"][:max_length]
            num = extract_floats(ans)
            parsed_answers.append(num)
            response.append(ans)
    return parsed_answers, response

result = {}
for mat_property in ["perovskites"]:

    for rep in ["composition","cif_p1","slice"]:
        data = load_json_as_dict(
            f"/home/so87pot/n0w0f/structllm/scripts/llama_evals_matbench_{mat_property}_{rep}.json"
        )
        ans, resp = get_parsed_answers_from_creation(data)
        print(f"Number of None values for {mat_property}  {rep}: {len([x for x in ans if x is None])}")
        none_indices = [(resp[i],i) for i, x in enumerate(ans) if x is None]
        print(none_indices)
        print([(resp[i],i) for i, x in enumerate(ans) if x > 50])
        mb = MatbenchBenchmark(autoload=False)
        benchmark = getattr(mb, f"matbench_{mat_property}")
        benchmark.load()
        preds =  pd.Series(ans)
        benchmark.record(0, preds)
        print(benchmark.results.fold_0.scores)
        name = f"{mat_property}_{rep}"
        result[name] = benchmark.results.fold_0.scores
# dump result to json
import json
with open("llama_evals_matbench_with_at_perovskites.json", "w") as f:
    json.dump(result, f)




2024-05-21 17:11:50 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']


Number of None values for perovskites  composition: 0
[]
[]
2024-05-21 17:11:51 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']
2024-05-21 17:11:51 INFO     Loading dataset 'matbench_perovskites'...
2024-05-21 17:11:54 INFO     Dataset 'matbench_perovskites loaded.
2024-05-21 17:11:54 INFO     Recorded fold matbench_perovskites-0 successfully.
{'mae': 0.9481410459587957, 'rmse': 1.2254892921941913, 'mape': 0.8583876797592208, 'max_error': 11.24}
Number of None values for perovskites  cif_p1: 0
[]
[]
2024-05-21 17:11:55 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft

In [8]:
import re

import json
from matbench.bench import MatbenchBenchmark
mb = MatbenchBenchmark(autoload=False)
import pandas  as pd


def load_json_as_dict(filepath):
    """
    Loads a JSON file and returns the data as a Python dictionary.
    """
    try:
        with open(filepath, "r") as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        raise FileNotFoundError(f"JSON file not found: {filepath}")
    
def extract_floats(number_string):
    # Define the regex pattern to match numbers
    number_pattern = re.compile(
        r"""
        -?                  # Optional negative sign
        (?:
            \d*\.?\d+       # Match digits with optional decimal point
            (?:[eE][+-]?\d+)?  # Optional scientific notation
        |
            \.\d+           # Match numbers starting with a decimal point
        )
    """,
        re.VERBOSE,
    )

    # Find all matches
    matches = number_pattern.findall(number_string)

    if matches:
        # Convert matches to floats
        float_numbers = [float(match) for match in matches]
        #return  float_numbers[0]
        if float_numbers[0] > 10:
            return 0
        else:
            return float_numbers[0]
    else:
        return 0


def get_parsed_answers_from_creation(data, max_length=2048):
    """
    Extracts a list of parsed_answers from a dictionary created by the provided response processing logic.

    Args:
        data (dict): A dictionary where keys are likely material IDs and values contain prompt, response, and parsed_answer.

    Returns:
        list: A list containing the parsed_answer strings from the input dictionary.
    """
    parsed_answers = []
    response = []
    for material_id, response_data in data.items():
        if "parsed_answer" in response_data:
            ans = response_data["parsed_answer"][:max_length]
            num = extract_floats(ans)
            parsed_answers.append(num)
            response.append(ans)
    return parsed_answers, response

result = {}
for mat_property in ["log_gvrh"]:

    for rep in ["composition","cif_p1","crystal_llm_rep"]:
        data = load_json_as_dict(
            f"/home/so87pot/n0w0f/structllm/scripts/llama_evals_matbench_{mat_property}_{rep}.json"
        )
        ans, resp = get_parsed_answers_from_creation(data)
        print(f"Number of None values for {mat_property}  {rep}: {len([x for x in ans if x is None])}")
        none_indices = [(resp[i],i) for i, x in enumerate(ans) if x is None]
        print(none_indices)
        print([(resp[i],i) for i, x in enumerate(ans) if x > 50])
        mb = MatbenchBenchmark(autoload=False)
        benchmark = getattr(mb, f"matbench_{mat_property}")
        benchmark.load()
        preds =  pd.Series(ans)
        benchmark.record(0, preds)
        print(benchmark.results.fold_0.scores)
        name = f"{mat_property}_{rep}"
        result[name] = benchmark.results.fold_0.scores
# dump result to json
import json
with open("llama_evals_matbench_with_at_crystal_llm.json", "w") as f:
    json.dump(result, f)




2024-05-21 17:15:25 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']
Number of None values for log_gvrh  composition: 0
[]
[]
2024-05-21 17:15:25 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_expt_gap',
 'matbench_expt_is_metal',
 'matbench_glass',
 'matbench_jdft2d',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_mp_e_form',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_perovskites',
 'matbench_phonons',
 'matbench_steels']
2024-05-21 17:15:25 INFO     Loading dataset 'matbench_log_gvrh'...
2024-05-21 17:15:29 INFO     Dataset 'matbench_log_gvrh loaded.
2024-05-21 17:15:29 INFO     Recorded fold matbench_log_gvrh-0 su