In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import os
from dotenv import load_dotenv
import json
from litellm import completion

In [None]:
model_name = "llama-3.2-3b-it-Perovskite-PaperExtractor"
# model_name = "meta-llama/Llama-3.2-3B-Instruct"
device = "cuda"

model = AutoModelForCausalLM.from_pretrained(model_name, ).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.model_max_length = 60000
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    temperature=0.1,
    device=0
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.81s/it]


In [None]:
PREFIX = """
You are a helpful scientific assistant. Your task is to extract relevant scientific data from the provided text about perovskite solar cells and passivating molecules. If the data is not available in the text, return null for the respective fields. Output the information in JSON format with the following fields:
- `control_pce`: Power conversion efficiency for control perovskite (numeric).
- `control_voc`: Open-circuit voltage for control perovskite (numeric).
- `treated_pce`: Best Power conversion efficiency for treated perovskite (numeric).
- `treated_voc`: Best Open-circuit voltage for treated perovskite (numeric).
- `passivating_molecule`: Name of the champion passivating molecule tested.
- `perovskite_composition`: Chemical formula of the perovskite (string).
- `electron_transport_layer`: Material used as the electron transport layer (string).
- `hole_transport_layer`: Material used as the hole transport layer (string).
- Stability tests: Include any stability tests mentioned. Stability tests can be done in dark storage, light-soaking, thermal cycling, light cycling, and solar-thermal cycling. If none of these types are tested, do not include a JSON object for them.
For each test, the value should follow this format:
```json
{
  "test_name": null,
  "temperature": null,
  "time": null,
  "humidity": null,
  "control_efficiency": null,
  "treatment_efficiency": null
}

The JSON structure must follow this exact format:
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": null,
  "treated_voc": null,
  "passivating_molecule": null,
  "perovskite_composition": null,
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": null,
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    },
  ]
}
Be concise and accurate. Include only information explicitly present in the text.
Don't return ranges for any values, as this will cause the JSON to not parse correctly. If a range is presented, return the range as a string.
Only return JSON. The text is below:
"""
SUFFIX = """\n\n{sample}\n\n"""
def create_prompt(system, user):
    tokens = tokenizer.encode(user, max_length=25000, truncation=True)
    truncated_user = tokenizer.decode(tokens)
    return [
    {"role": "system", "content": system},
    {"role": "user", "content": truncated_user}, ]

In [None]:
contents = contents # {}

In [24]:
txt_dir = "../../data/txts"
# contents = {}
for filename in os.listdir(txt_dir):
    if filename.endswith(".txt") == False:
        continue
    filepath = os.path.join(txt_dir, filename)
    paper_id = os.path.splitext(filename)[0]
    if paper_id in contents:
        continue
    print(paper_id)

    with open(filepath, "r", encoding="utf-8") as file:
        text = file.read()
        instruction = create_prompt(PREFIX, text)
        json_string = pipe(instruction, max_new_tokens=1024)[0]["generated_text"][-1]['content']
        clean_json_string = json_string.strip("```json").strip("```").strip()
        try:
            parsed_data = json.loads(clean_json_string)
            contents[paper_id] = parsed_data
        except json.JSONDecodeError as e:
            print("Error creating JSON", e)
            print(clean_json_string)
        

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


132


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting value: line 1 column 1 (char 0)
Here is the extracted data in JSON format:

```
{
  "control_pce": 8.7,
  "control_voc": 1.13,
  "treated_pce": 23.1,
  "treated_voc": 1.177,
  "passivating_molecule": "1-dodecanethiol (DDT)",
  "perovskite_composition": "Cs0.05 FA x MA 1 - x PbI 3",
  "electron_transport_layer": "Spiro-OMeTAD",
  "hole_transport_layer": "Spiro-OMeTAD",
  "stability_tests": [
    {
      "test_name": "Wetting stability",
      "temperature": null,
      "time": 530,
      "humidity": 50-65,
      "control_efficiency": 19.7,
      "treatment_efficiency": 19.6
    },
    {
      "test_name": "Thermal stability",
      "temperature": 50-85,
      "time": 144,
      "humidity": null,
      "control_efficiency": 20.2,
      "treatment_efficiency": 20.3
    },
    {
      "test_name": "Illumination stability",
      "temperature": 30-35,
      "time": 2000,
      "humidity": null,
      "control_efficiency": 19.0,
      "treatment_efficiency": 20.

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


140


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


147


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


2


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


35


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


98


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


49


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 15 column 22 (char 411)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 16.25,
  "treated_voc": 1.31,
  "passivating_molecule": "n-butylamine acetate (BAAc)",
  "perovskite_composition": "BA2MAn-1PbI3n+1",
  "electron_transport_layer": "SnO2",
  "hole_transport_layer": "spiro-OMeTAD",
  "stability_tests": [
    {
      "test_name": "Dark storage",
      "temperature": null,
      "time": 4680,
      "humidity": 65 ± 10%,
      "control_efficiency": null,
      "treatment_efficiency": 93.8%
    },
    {
      "test_name": "Light-soaking",
      "temperature": null,
      "time": 1100,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 96.2%
    },
    {
      "test_name": "Thermal cycling",
      "temperature": 85 °C,
      "time": 558,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 95.7%
    },
    {
      "test_name": "Light cycling",
      "

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


32


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


91


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


40


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


107


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


96


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


47


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


100


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


125


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 21 column 25 (char 559)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 23.7,
  "treated_voc": 1.14,
  "passivating_molecule": "selenophene",
  "perovskite_composition": "Cs1-xFAxPbI3",
  "electron_transport_layer": "SnO2",
  "hole_transport_layer": "PTAA/Spiro-OMeTAD",
  "stability_tests": [
    {
      "test_name": "Dark storage stability",
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 91
    },
    {
      "test_name": "Light-soaking stability",
      "temperature": 45 ± 5°C,
      "time": 24 hours,
      "humidity": 30~50% RH,
      "control_efficiency": null,
      "treatment_efficiency": 80
    },
    {
      "test_name": "Thermal cycling stability",
      "temperature": 85 ± 5°C,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 80
    },
    {
      "test_name": "Ligh

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


122


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


65


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


19


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


17


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


10


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


25


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


59


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


88
Error creating JSON Expecting ',' delimiter: line 31 column 21 (char 965)
{
  "control_pce": 19.03,
  "control_voc": 0.92,
  "treated_pce": 21.46,
  "treated_voc": 0.94,
  "passivating_molecule": "NaF",
  "perovskite_composition": "(Cs 0.05 FA 0.54 MA 0.41 )Pb(I 0.98 Br 0.02 ) 3",
  "electron_transport_layer": "SnO2",
  "hole_transport_layer": "Spiro-OMeTAD",
  "stability_tests": [
    {
      "test_name": "Continuous one-sun illumination in a nitrogen atmosphere",
      "temperature": null,
      "time": 1000,
      "humidity": null,
      "control_efficiency": 19.03,
      "treatment_efficiency": 21.46
    },
    {
      "test_name": "Thermal stress at 85 °C in a nitrogen atmosphere",
      "temperature": 85,
      "time": 1000,
      "humidity": null,
      "control_efficiency": 19.03,
      "treatment_efficiency": 21.46
    },
    {
      "test_name": "Storage in ambient air with a relative humidity of about 25%-45% and a temperature of about 25-40 °C",
      "temperature": null

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


22


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


117


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


50


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


81


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


110


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


57
86


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


97


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


46


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


101


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


90


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


41


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 13 column 25 (char 487)
{
  "control_pce": 21.94,
  "control_voc": 1.135,
  "treated_pce": 23.92,
  "treated_voc": 1.157,
  "passivating_molecule": "ortho-(phenylene)di(ethylammonium) iodide (o-PDEAI2)",
  "perovskite_composition": "PbI2: PbBr2: FAI: MAI: CsI: MACl = 0.98: 0.02: 0.81: 0.04: 0.05: 0.20",
  "electron_transport_layer": "fluorine-doped tin oxide (FTO)",
  "hole_transport_layer": "spiro-OMeTAD",
  "stability_tests": [
    {
      "test_name": "Long-term stability test",
      "temperature": 85 °C,
      "time": 1000 h,
      "humidity": 40-50% RH,
      "control_efficiency": 58%,
      "treatment_efficiency": 85%
    },
    {
      "test_name": "Thermal stability test",
      "temperature": 85 °C,
      "time": 1000 h,
      "humidity": 0% RH,
      "control_efficiency": 49%,
      "treatment_efficiency": 25%
    }
  ]
}
106


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


108


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 15 column 22 (char 391)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 24.48,
  "treated_voc": 1.138,
  "passivating_molecule": "PEAI",
  "perovskite_composition": "FAPbI3",
  "electron_transport_layer": "mp-TiO2",
  "hole_transport_layer": "Spiro-OMeTAD",
  "stability_tests": [
    {
      "test_name": "Thermal stability test",
      "temperature": 85,
      "time": 1300,
      "humidity": 15 to 25% RH,
      "control_efficiency": 80%,
      "treatment_efficiency": 80%
    },
    {
      "test_name": "Thermal stability test at 150°C",
      "temperature": 150,
      "time": 20,
      "humidity": 15 to 25% RH,
      "control_efficiency": 60%,
      "treatment_efficiency": 80%
    },
    {
      "test_name": "Long-term stability test",
      "temperature": 25°C,
      "time": 400,
      "humidity": 25°C,
      "control_efficiency": null,
      "treatment_efficiency": null
    }
  ]
}
33


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


34


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


99


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


48


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


146


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 13 column 25 (char 667)
{
  "control_pce": 21.5,
  "control_voc": 1.128,
  "treated_pce": 23.05,
  "treated_voc": 1.170,
  "passivating_molecule": "1,2-bis(4-phenyl)-1,2-dicarba-closo-decaborane (CB-ph) dodecahydro-arachno-bis(acatonitrile)decaborane (CB-ph) and 1,2-bis(4-nitrobenzene)-1,2-dicarba-closo-dodecaborane (CB-NO2) and 1,2-bis(4-phenyl)-1,2-dicarba-closo-decaborane (CB-NH2) decorated with phenylamino groups",
  "perovskite_composition": "Cs0.05 (MA0.05 FA0.95)0.95 Pb(I0.95 Br0.05)3",
  "electron_transport_layer": "C60",
  "hole_transport_layer": "BCP",
  "stability_tests": [
    {
      "test_name": "Maximum Power Point Tracking (MPP) in glovebox",
      "temperature": 26 °C,
      "time": 400 h,
      "humidity": 30% RH,
      "control_efficiency": 21.10%,
      "treatment_efficiency": 23.04%
    },
    {
      "test_name": "MPP tracking under 1 sun illumination in air (30% RH) and at 40 °C",
      "temperature": 40 °C,
     

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


4


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


141


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


74


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


133


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


148


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


73


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


134


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


93


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


42


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


105


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


39


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


94


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 15 column 21 (char 372)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 20.6,
  "treated_voc": 1.129,
  "passivating_molecule": "P4VP",
  "perovskite_composition": null,
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": "Dark storage stability",
      "temperature": null,
      "time": 6000,
      "humidity": 25-50% relative humidity,
      "control_efficiency": null,
      "treatment_efficiency": 80%
    },
    {
      "test_name": "Light-soaking stability",
      "temperature": null,
      "time": null,
      "humidity": 25-50% relative humidity,
      "control_efficiency": null,
      "treatment_efficiency": null
    },
    {
      "test_name": "Thermal cycling stability",
      "temperature": 85 C,
      "time": 300,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 80%
    },
    {
      "test_name": "Light cycli

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Extra data: line 21 column 1 (char 434)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 25.1,
  "treated_voc": null,
  "passivating_molecule": null,
  "perovskite_composition": null,
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": null,
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    }
  ]
}
```

Note: The provided text does not contain specific data on power conversion efficiency (PCE), open-circuit voltage (VOC), passivating molecules, electron transport layers, hole transport layers, or stability tests. Therefore, the corresponding fields are set to null.
102


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


37


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


30
Error creating JSON Expecting value: line 1 column 1 (char 0)
Here is the extracted data in JSON format:

```
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 23.21,
  "treated_voc": 1.16,
  "passivating_molecule": "Ammonia",
  "perovskite_composition": "α-FAPbI3",
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": "Dark storage",
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    },
    {
      "test_name": "Light-soaking",
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    },
    {
      "test_name": "Thermal cycling",
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    },
    {
      "test_name": "Light cycling",
      "tempera

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Extra data: line 21 column 1 (char 434)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": null,
  "treated_voc": null,
  "passivating_molecule": null,
  "perovskite_composition": null,
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": null,
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    }
  ]
}
```

Note: The provided text does not contain specific data on the power conversion efficiency (PCE), open-circuit voltage (VOC), treated PCE, and treated VOC for the control and treated perovskites. Therefore, these fields are set to null.
79


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


142


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 14 column 18 (char 428)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 16.13,
  "treated_voc": 1.08,
  "passivating_molecule": "Formamidinium iodide (FAI)",
  "perovskite_composition": "MAPbI3-(BA)2(MA)nÀ1Pb(nI)3n+1",
  "electron_transport_layer": "2D perovskite layer",
  "hole_transport_layer": "2D perovskite layer",
  "stability_tests": [
    {
      "test_name": "Humidity stability",
      "temperature": null,
      "time": 50 days,
      "humidity": 65% G 5%,
      "control_efficiency": null,
      "treatment_efficiency": 71%
    },
    {
      "test_name": "Thermal stability",
      "temperature": 85 C G 3 C,
      "time": 30 days,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 74%
    },
    {
      "test_name": "Illumination stability",
      "temperature": null,
      "time": 24 hr,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 9

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 15 column 21 (char 389)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 20.48,
  "treated_voc": 1.13,
  "passivating_molecule": "4-fluoroaniline (FAL)",
  "perovskite_composition": "MAPbI3",
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": "Humidity stability",
      "temperature": null,
      "time": null,
      "humidity": 85-95%",
      "control_efficiency": null,
      "treatment_efficiency": "retained 75% of initial efficiency"
    },
    {
      "test_name": "Photostability",
      "temperature": 85 °C,
      "time": 100 h,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": "retained 75% of initial efficiency"
    }
  ]
}
0


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 13 column 24 (char 431)
{
  "control_pce": 25.7,
  "control_voc": 1.17,
  "treated_pce": 23.77,
  "treated_voc": 1.145,
  "passivating_molecule": "Cyclohexylmethylammonium iodide (CMAI)",
  "perovskite_composition": "α-formamidinium lead triiodide (FAPbI3)",
  "electron_transport_layer": "C60",
  "hole_transport_layer": "SAMs (self-assembled monolayers)",
  "stability_tests": [
    {
      "test_name": "Dark storage stability",
      "temperature": 85°C,
      "time": 1500 hours,
      "humidity": 50 ± 10% RH,
      "control_efficiency": 23.2%,
      "treatment_efficiency": 19.9%
    },
    {
      "test_name": "Light-soaking stability",
      "temperature": 65°C,
      "time": 1200 hours,
      "humidity": 50 ± 10% RH,
      "control_efficiency": 14.2%,
      "treatment_efficiency": 19.9%
    },
    {
      "test_name": "Thermal cycling stability",
      "temperature": 65°C,
      "time": 500 hours,
      "humidity": 50 ± 10% RH,
     

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


70


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


137


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


9


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


77


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 13 column 24 (char 361)
{
  "control_pce": 21.2,
  "control_voc": 1.07,
  "treated_pce": 24.1,
  "treated_voc": 1.14,
  "passivating_molecule": "PEAMA +",
  "perovskite_composition": "FA 0.9 Cs 0.1 PbI 3",
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": "High-temperature photostability test",
      "temperature": 85°C,
      "time": 150 hours,
      "humidity": null,
      "control_efficiency": 90%,
      "treatment_efficiency": 90%
    },
    {
      "test_name": "High-temperature photostability test",
      "temperature": 100°C,
      "time": 80 hours,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    }
  ]
}
130


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


121


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


66


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


126


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


61


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 23 column 21 (char 581)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 23.1,
  "treated_voc": 1.23,
  "passivating_molecule": "Butylamine",
  "perovskite_composition": "MAPbI3",
  "electron_transport_layer": "Spiro-OMeTAD",
  "hole_transport_layer": "SnO2",
  "stability_tests": [
    {
      "test_name": "Thermal Stability",
      "temperature": 85,
      "time": 125,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": 60
    },
    {
      "test_name": "Environmental Stability",
      "temperature": null,
      "time": 600,
      "humidity": 30-40,
      "control_efficiency": null,
      "treatment_efficiency": 92
    }
  ]
}
13


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


128
68


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


14


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


21


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


26


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


28


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


113


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


54


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


85


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


114


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Extra data: line 21 column 1 (char 457)
{
  "control_pce": null,
  "control_voc": null,
  "treated_pce": 23.37,
  "treated_voc": 1.17,
  "passivating_molecule": "Piperazinium iodide (PI)",
  "perovskite_composition": null,
  "electron_transport_layer": null,
  "hole_transport_layer": null,
  "stability_tests": [
    {
      "test_name": null,
      "temperature": null,
      "time": null,
      "humidity": null,
      "control_efficiency": null,
      "treatment_efficiency": null
    }
  ]
}
```

Note: The provided text does not contain specific data on the control perovskite's power conversion efficiency (PCE) and open-circuit voltage (VOC). Therefore, the fields `control_pce` and `control_voc` are set to `null`.
53


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


82


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


8


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


76


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


131


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 21 column 24 (char 568)
{
  "control_pce": 22.2,
  "control_voc": 1.15,
  "treated_pce": 25.5,
  "treated_voc": 1.208,
  "passivating_molecule": "Al2O3",
  "perovskite_composition": "FA0.95MA0.05Pb(I0.95Br0.05)3",
  "electron_transport_layer": "PTAA",
  "hole_transport_layer": "Me-4PACz",
  "stability_tests": [
    {
      "test_name": "Accelerated lifetime test",
      "temperature": 85,
      "time": 1000,
      "humidity": null,
      "control_efficiency": 22.2,
      "treatment_efficiency": 24.7
    },
    {
      "test_name": "Maximum-power-point tracking",
      "temperature": 35-40,
      "time": 300,
      "humidity": null,
      "control_efficiency": 22.2,
      "treatment_efficiency": 24.71
    }
  ]
}
71


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


136


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


144


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


1


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


138


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Error creating JSON Expecting ',' delimiter: line 13 column 24 (char 372)
{
  "control_pce": 24.6,
  "control_voc": null,
  "treated_pce": 25.3,
  "treated_voc": null,
  "passivating_molecule": "3-mercaptopropionic acid (3-MPA)",
  "perovskite_composition": "Cs0.05MA0.10FA0.85PbI3",
  "electron_transport_layer": "FTO",
  "hole_transport_layer": "FTO",
  "stability_tests": [
    {
      "test_name": "ISOS-L-3 protocol",
      "temperature": 65°C,
      "time": 1075 h,
      "humidity": "~50% RH",
      "control_efficiency": 24.6%,
      "treatment_efficiency": 25.3%
    },
    {
      "test_name": "ISOS-L-3 protocol",
      "temperature": 85°C,
      "time": 1560 h,
      "humidity": "~50% RH",
      "control_efficiency": null,
      "treatment_efficiency": null
    },
    {
      "test_name": "ISOS-L-3 protocol",
      "temperature": 65°C,
      "time": 500 h,
      "humidity": "~85% RH",
      "control_efficiency": 23.1%,
      "treatment_efficiency": 25.0%
    },
    {
      "test_na

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


78


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


143


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


31


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


36


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


95


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


44


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


103


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


92


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


43


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


104


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


115


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


52


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


83


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


29


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


112


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


55


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


84


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


27


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


20


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


69


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


15


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


12


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


129


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


127


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


60


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


120


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


67


In [25]:
json_path = "../../data/finetuned_llama_output.json"
with open(json_path, "w") as file:
    json.dump(contents, file, indent=4)

{'control_pce': None,
 'control_voc': None,
 'treated_pce': 23.9,
 'treated_voc': None,
 'passivating_molecule': 'isopropylammonium chloride',
 'perovskite_composition': 'FAPbI3',
 'electron_transport_layer': None,
 'hole_transport_layer': None,
 'stability_tests': [{'test_name': 'dark storage',
   'temperature': None,
   'time': None,
   'humidity': None,
   'control_efficiency': None,
   'treatment_efficiency': None},
  {'test_name': 'light-soaking',
   'temperature': None,
   'time': None,
   'humidity': None,
   'control_efficiency': None,
   'treatment_efficiency': None},
  {'test_name': 'thermal cycling',
   'temperature': None,
   'time': None,
   'humidity': None,
   'control_efficiency': None,
   'treatment_efficiency': None},
  {'test_name': 'light cycling',
   'temperature': None,
   'time': None,
   'humidity': None,
   'control_efficiency': None,
   'treatment_efficiency': None},
  {'test_name': 'solar-thermal cycling',
   'temperature': None,
   'time': None,
   'humidity