# This file compares the teamtat annotation with Extraction performed (Json)

In [1]:
from sklearn.metrics import precision_score, recall_score, f1_score
from difflib import SequenceMatcher
import numpy as np
import json
import os
import xml.etree.ElementTree as ET 
import pandas as pd
import re
from sklearn.metrics.pairwise import cosine_similarity

## File Preparation

In [2]:
def str_toJson(string):
    ##The json output from annotation dataframe was not in correct json format
    # We will change the None to null
    json_string = string.replace("None", "null")

    try:
        # Try to load the JSON string
        json_object = json.loads(json_string)
        return json_object
    except json.JSONDecodeError as e:
        # Catch JSONDecodeError if the string is not valid JSON
        print(f"Error decoding JSON: {e}")
        return None
    except Exception as e:
        # Catch any other exceptions
        print(f"An error occurred: {e}")
        return None

In [3]:
def include_passivating(dictionary):
    ##In extraction json, realized that some extraction has passivating molecule that is NOT included in its stability testing. 
    ## Since passivating molecule (if exist) needs to be in stability testing (nexted dictionary), we will transfer the information and spit out a cleaned dictionary. 
    if "passivating_molecule" in dictionary.keys():
        passivating = dictionary['passivating_molecule']
        del dictionary['passivating_molecule']
        
        for entity in dictionary.keys():
            if entity.startswith('test'):
                # print(i['entity'])
                if type(dictionary[entity]) == dict:
                    if 'passivating_molecule' in dictionary[entity].keys():
                        continue
                    else:
                        # print("Have to include passivating molecule in tests")
                        dictionary[entity]['passivating_molecule'] = passivating
        
    return dictionary

In [4]:
## Convert all numerical data into float for both

#### WORK ON NUMERICAL DATA THAT INCLUDES RANGE "-" 
def convert_numeric(dictionary):
    numerical_key = ['time', 'efficiency_cont', 'efficiency_tret', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    exception_numeric = ['humidity', 'temperature']

    translation_table = str.maketrans('', '', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*()')
    for key in dictionary.keys():
        if (key.startswith('test')) & (type(dictionary[key]) == dict):
            for entity in dictionary[key].keys():
                if entity in numerical_key:
                    # print(dictionary[key][entity])
                    if isinstance(dictionary[key][entity], str): 
                        substitute = re.sub(r'[^0-9.]', '', dictionary[key][entity][:4])
                        if len(substitute) != 0:
                            numerical_value = float(substitute)
                            dictionary[key][entity] = numerical_value
                        else:
                            dictionary[key][entity] = None
                elif entity in exception_numeric:
                    if isinstance(dictionary[key][entity], str): 
                        if "-" not in dictionary[key][entity]:
                            # print("regular_case",dictionary[key][entity])
                            substitute = re.sub(r'[^0-9.]', '', dictionary[key][entity][:4])
                            if len(substitute) != 0:
                                numerical_value = float(substitute)
                                dictionary[key][entity] = numerical_value
                            else:
                                dictionary[key][entity] = None
                        # else:
                            
                        #     print(dictionary[key][entity])


    return dictionary

In [5]:
def convert_efficiency(dictionary):
    entity_decimal = ['efficiency_cont','efficiency_tret']
    for key in dictionary.keys():
        if (key.startswith('test')) & (type(dictionary[key]) == dict):
            for entity in dictionary[key].keys():
                if (entity in entity_decimal) and (dictionary[key][entity] != None):
                    if dictionary[key][entity] == dictionary[key][entity] > 1:
                        dictionary[key][entity] = dictionary[key][entity] / 100
    return dictionary


    

#### Analyzing these outputs

Annotation notes: 
- THE 4 basic variable that is to compare is PEROVSKITE COMPOSITION, ETL, HTL, STRUCTURE
- Stability entity: efficiency_control is wrong, All value is None, so ignore. 
- Common entity: ['stability_type', 'passivating_molecule', 'humidity', 'temperature', 'time', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc', 'efficiency_tret'] 
    - 'efficiency_cont' are included or not

- the efficiency in extracted data need to be converted to decimals since it is in percentage SOMETIMES
    - 'control_efficiency' and 'treatment_efficiency can be ignored


Extraction notes:
- some extraction has passivating molecule that is NOT included in its stability testing. 

### Loading Teamtat Annotation as dataframe

In [6]:
pd.read_csv("../data/150_papers_json_update.csv")

Unnamed: 0,first_num,id,text,memory,output,second_num
0,0,0_54,\t\t\t of 5 Downloaded from https://www.scienc...,"{""perovskite_composition"": ""Cs0.05FA0.85MA0.1P...","{""perovskite_composition"": ""Cs0.05FA0.85MA0.1P...",54
1,1,1_22,\t\t\t NAture PhotoNiCS | VOL 13 | JULY 2019 |...,"{""perovskite_composition"": null, ""electron_tra...","{""perovskite_composition"": null, ""electron_tra...",22
2,2,2_75,\t\t\t Nature eNerGY | VOL 6 | JANUARY 2021 | ...,"{""perovskite_composition"": ""(FAPbI3)0.95(MAPbB...","{""perovskite_composition"": ""(FAPbI3)0.95(MAPbB...",75
3,3,3_52,\t\t\t of 6 RESEARCH | REPORT Downloaded from ...,"{""perovskite_composition"": ""Cs0.05(MA0.10FA0.8...","{""perovskite_composition"": ""Cs0.05(MA0.10FA0.8...",52
4,4,4_26,"Proppe 1,2,10 , Andrew Johnston 2,10 , Sam T...","{""perovskite_composition"": null, ""electron_tra...","{""perovskite_composition"": ""(MAPbBr3)0.05(FAPb...",26
...,...,...,...,...,...,...
143,145,145_31,\t\t\t www.advancedsciencenews.com© 2018 WILEY...,"{""perovskite_composition"": ""Cs0.05(MA0.17FA0.8...","{""perovskite_composition"": ""Cs0.05(MA0.17FA0.8...",31
144,146,146_36,\t\t\t https://doi.org/10.1038/s41467-022-3420...,"{""perovskite_composition"": ""Cs0.05(MA0.05FA0.9...","{""perovskite_composition"": ""Cs0.05(MA0.05FA0.9...",36
145,147,147_41,Table 1 | PV parameters of the best-performed...,"{""perovskite_composition"": ""FAPbI 3"", ""electro...","{""perovskite_composition"": ""FAPbI 3"", ""electro...",41
146,148,148_26,"\t\t\t 15214095, 2020, 12, Downloaded from htt...","{""perovskite_composition"": ""Cs0.05FA0.85MA0.10...","{""perovskite_composition"": ""Cs0.05FA0.85MA0.10...",26


In [7]:
#Teamtat Annotation
annotation_df = pd.read_csv("../data/150_papers_json_update.csv")[["id", "first_num", "output"]]
annotation_df = annotation_df.sort_values(by = ['first_num'])

In [8]:
##Change the format (minor) to be converted to json
annotation_df['output'] = annotation_df['output'].apply(str_toJson)
annotation_df

Unnamed: 0,id,first_num,output
0,0_54,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...
1,1_22,1,"{'perovskite_composition': None, 'electron_tra..."
2,2_75,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...
3,3_52,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
4,4_26,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
...,...,...,...
143,145_31,145,{'perovskite_composition': 'Cs0.05(MA0.17FA0.8...
144,146_36,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
145,147_41,147,"{'perovskite_composition': 'FAPbI 3', 'electro..."
146,148_26,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...


In [9]:
len(annotation_df['first_num'].unique())

148

In [10]:
annotation_df['output'][7]

{'perovskite_composition': 'Cs0.05(MA)0.16(FA)0.79Pb(I0.83Br0.17 )3',
 'electron_transport_layer': 'C60',
 'hole_transport_layer': None,
 'structure_pin_nip': 'NIP',
 'test_1': {'stability_type': 'ISOSD1',
  'passivating_molecule': '4-tert-butyl-benzylammonium',
  'humidity': '85',
  'temperature': None,
  'time': '100',
  'control_pce': None,
  'treated_pce': None,
  'control_voc': '1.09',
  'treated_voc': '1.13',
  'efficiency_cont': '60',
  'efficiency_tret': '90'},
 'test_2_2': {'stability_type': 'ISOSL3',
  'passivating_molecule': 'phenethylammonium iodide',
  'humidity': '50',
  'temperature': '90',
  'time': '100',
  'control_pce': '8.9',
  'treated_pce': '5.94',
  'control_voc': None,
  'treated_voc': None,
  'efficiency_cont': '0',
  'efficiency_tret': '95'},
 'test_2': {'stability_type': 'ISOSL1',
  'passivating_molecule': '(ethylenedioxy)bis(ethylammonium) lead iodide',
  'humidity': '65',
  'temperature': None,
  'time': '1000',
  'control_pce': None,
  'treated_pce': '21.0

In [11]:
annotation_df['output'] = annotation_df['output'].apply(convert_numeric)

In [12]:
# # Exporting annotation
# annotation_df.to_csv('annotation.csv', index=False)

### Loading in JSON extraction

In [41]:
def convert_efficiency_key(dict):
    for key, item in dict.items():
        if 'test' in key:
            if 'retained_proportion_cont' in dict[key]:
                dict[key]['efficiency_cont'] = dict[key].pop('retained_proportion_cont')
            if 'retained_proportion_tret' in dict[key]:
                dict[key]['efficiency_tret'] = dict[key].pop('retained_proportion_tret')
    return dict
            

In [145]:
## extraction performed by basemodel
# Read JSON from a file
with open("../data/output1.json", 'r') as f:
    extraction = json.load(f)

extraction_base = pd.DataFrame(list(extraction.items()), columns=['paper_num', 'output'])
extraction_base['paper_num'] = pd.to_numeric(extraction_base['paper_num'])
extraction_base = extraction_base.sort_values('paper_num')
extraction_base['output'] = extraction_base['output'].apply(include_passivating)
extraction_base['output'] = extraction_base['output'].apply(convert_numeric)
extraction_base['output'] = extraction_base['output'].apply(convert_efficiency)
extraction_base

Unnamed: 0,paper_num,output
77,0,"{'perovskite_composition': 'FAPbI3', 'electron..."
124,1,"{'perovskite_composition': 'FA1-x MAx PbI3', '..."
7,2,"{'perovskite_composition': '(BA)2PbI4', 'elect..."
34,3,{'perovskite_composition': 'Cs0.05 (MA0.10FA0....
30,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
...,...,...
40,145,{'perovskite_composition': 'Cs0.05(MA0.17FA0.8...
87,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
120,147,"{'perovskite_composition': 'FAPbI3', 'electron..."
83,148,{'perovskite_composition': 'Cs 0.05 FA 0.85 MA...


In [146]:
## extraction performed by finetuned deepseek
# Read JSON from a file
with open("../data/deepseek_finetuned_formatted.json", 'r') as f:
    extraction = json.load(f)

extraction_train = pd.DataFrame(list(extraction.items()), columns=['paper_num', 'output'])
extraction_train['paper_num'] = pd.to_numeric(extraction_train['paper_num'])
extraction_train = extraction_train.sort_values('paper_num')
extraction_train['output'] = extraction_train['output'].apply(include_passivating)
extraction_train['output'] = extraction_train['output'].apply(convert_numeric)
extraction_train['output'] = extraction_train['output'].apply(convert_efficiency)
extraction_train['output'] = extraction_train['output'].apply(convert_efficiency_key)
extraction_train

Unnamed: 0,paper_num,output
111,0,"{'perovskite_composition': 'FAPbI3', 'electron..."
67,1,{'perovskite_composition': 'FA1-x MAx PbI3 (FA...
46,2,{'perovskite_composition': 'FA0.85Cs0.15PbI2.8...
30,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
27,4,"{'perovskite_composition': None, 'electron_tra..."
...,...,...
52,145,{'perovskite_composition': 'Cs0.05(MA0.17FA0.8...
73,146,{'perovskite_composition': 'Cs0.05(FA0.05MA0.9...
102,147,"{'perovskite_composition': 'FAPbI3', 'electron..."
80,148,{'perovskite_composition': 'Cs0.05 FA0.85 MA0....


In [147]:
extraction_train.iloc[4]['output']

{'perovskite_composition': None,
 'electron_transport_layer': None,
 'hole_transport_layer': None,
 'structure_pin_nip': None,
 'test_1': {'stability_type': 'ISOS-L',
  'passivating_molecule': '4-vinylbenzylammonium bromide',
  'humidity': None,
  'temperature': None,
  'time': None,
  'control_pce': 19.93,
  'treated_pce': 21.06,
  'control_voc': 1.06,
  'treated_voc': 1.1,
  'efficiency_cont': None,
  'efficiency_tret': 21.9}}

## Merging dataframe

In [148]:
evaluate_df_base = annotation_df.merge(extraction_base, left_on='first_num', right_on='paper_num')[["paper_num", "output_x",'output_y']]
evaluate_df_base.columns = ['paper_num', 'annotation', 'extracted']
evaluate_df_base

Unnamed: 0,paper_num,annotation,extracted
0,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...,"{'perovskite_composition': 'FAPbI3', 'electron..."
1,1,"{'perovskite_composition': None, 'electron_tra...","{'perovskite_composition': 'FA1-x MAx PbI3', '..."
2,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...,"{'perovskite_composition': '(BA)2PbI4', 'elect..."
3,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...,{'perovskite_composition': 'Cs0.05 (MA0.10FA0....
4,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
...,...,...,...
121,145,{'perovskite_composition': 'Cs0.05(MA0.17FA0.8...,{'perovskite_composition': 'Cs0.05(MA0.17FA0.8...
122,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
123,147,"{'perovskite_composition': 'FAPbI 3', 'electro...","{'perovskite_composition': 'FAPbI3', 'electron..."
124,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...,{'perovskite_composition': 'Cs 0.05 FA 0.85 MA...


In [149]:
evaluate_df_train = annotation_df.merge(extraction_train, left_on='first_num', right_on='paper_num')[["paper_num", "output_x",'output_y']]
evaluate_df_train.columns = ['paper_num', 'annotation', 'extracted']
evaluate_df_train

Unnamed: 0,paper_num,annotation,extracted
0,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...,"{'perovskite_composition': 'FAPbI3', 'electron..."
1,1,"{'perovskite_composition': None, 'electron_tra...",{'perovskite_composition': 'FA1-x MAx PbI3 (FA...
2,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...,{'perovskite_composition': 'FA0.85Cs0.15PbI2.8...
3,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
4,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...,"{'perovskite_composition': None, 'electron_tra..."
...,...,...,...
121,145,{'perovskite_composition': 'Cs0.05(MA0.17FA0.8...,{'perovskite_composition': 'Cs0.05(MA0.17FA0.8...
122,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...,{'perovskite_composition': 'Cs0.05(FA0.05MA0.9...
123,147,"{'perovskite_composition': 'FAPbI 3', 'electro...","{'perovskite_composition': 'FAPbI3', 'electron..."
124,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...,{'perovskite_composition': 'Cs0.05 FA0.85 MA0....


In [150]:
# evaluate_df.to_csv('merged.csv', index=False)

In [151]:
for row in evaluate_df_train.itertuples():
    label_value = row.annotation
    extracted_value = row.extracted

    print(label_value)
    print(extracted_value)

{'perovskite_composition': 'Cs0.05FA0.85MA0.1PbI3', 'electron_transport_layer': 'C60', 'hole_transport_layer': '2PACz', 'structure_pin_nip': 'PIN', 'test_1': {'stability_type': 'ISOSL3', 'passivating_molecule': '4-chlorobenzenesulfonate', 'humidity': 50.0, 'temperature': 65.0, 'time': 1200.0, 'control_pce': 24.0, 'treated_pce': 26.9, 'control_voc': 0, 'treated_voc': 1.18, 'efficiency_cont': 0, 'efficiency_tret': 95.0}, 'test_1_2': {'stability_type': 'ISOSL3', 'passivating_molecule': '4-chlorobenzenesulfonate', 'humidity': 50.0, 'temperature': 85.0, 'time': 540.0, 'control_pce': 24.0, 'treated_pce': 26.9, 'control_voc': 0, 'treated_voc': 0, 'efficiency_cont': 0, 'efficiency_tret': 87.0}, 'test_2': {'stability_type': 'ISOSD2', 'passivating_molecule': '4-chlorobenzenesulfonate', 'humidity': 0, 'temperature': 85.0, 'time': 1500.0, 'control_pce': 24.0, 'treated_pce': 26.9, 'control_voc': 0, 'treated_voc': 0, 'efficiency_cont': 0, 'efficiency_tret': 95.0}}
{'perovskite_composition': 'FAPbI3'

In [152]:
evaluate_df["annotation"][6]

NameError: name 'evaluate_df' is not defined

In [153]:
evaluate_df["extracted"][1]

NameError: name 'evaluate_df' is not defined

## Evaluation

- We need precision and recall for EACH variable
- For each variable, calculate the F1 score - There is F1 score for each variable
- Take a weighted average ***For now, just take the average.

In [154]:
def tests_comparison(stability_annotated, label_dict, stability_extracted, extract_dict):
    # print(stability_annotated, label_dict, stability_extracted, extract_dict)
    stability_entity_annotated = ['stability_type', 'passivating_molecule', 'temperature', 'time', 'humidity', 'efficiency_cont', 'efficiency_tret', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    stability_entity_extracted = ['test_name', 'passivating_molecule', 'temperature', 'time', 'humidity', 'control_efficiency', 'treatment_efficiency', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    
    # print(f"stability_annotated{stability_annotated}")
    # print(f"label_dict{label_dict}")
    # print(f"stability_extracted{stability_extracted}")
    # print(f"extract_dict{extract_dict}")



    compared_metric = []
    numeric_data_annotated = []
    numeric_data_extracted = []
    for entity_i in range(len(stability_entity_annotated)):
        if entity_i <= 1:
            if stability_entity_extracted[entity_i] not in extract_dict.keys():
                extract_dict[stability_entity_extracted[entity_i]] = None

            if (label_dict[stability_entity_annotated[entity_i]] == None) | (extract_dict[stability_entity_extracted[entity_i]] == None):
                compared_metric.append(None)
            else:
                ##Text entity, perform Sequence Matcher 
                compared = SequenceMatcher(None, label_dict[stability_entity_annotated[entity_i]], extract_dict[stability_entity_extracted[entity_i]]).ratio()
                # print(compared)
                if entity_i == 0:
                    if compared > 0.9:
                        compared_metric.append(1)
                    else:
                        compared_metric.append(0)
                else:
                    compared_metric.append(compared)
        else:
            if stability_entity_extracted[entity_i] not in extract_dict.keys():
                extract_dict[stability_entity_extracted[entity_i]] = 0
            elif extract_dict[stability_entity_extracted[entity_i]] == None:
                extract_dict[stability_entity_extracted[entity_i]] = 0

            if stability_entity_annotated[entity_i] not in label_dict.keys():
                label_dict[stability_entity_annotated[entity_i]] = 0
            elif label_dict[stability_entity_annotated[entity_i]] == None:
                label_dict[stability_entity_annotated[entity_i]] = 0

                
            numeric_data_annotated.append(label_dict[stability_entity_annotated[entity_i]])
            numeric_data_extracted.append(extract_dict[stability_entity_extracted[entity_i]])

    if isinstance(numeric_data_extracted[0], list):
        ##There was one column with two temperature recorded as a list (probably thermal cycling)
        numeric_data_extracted[0] = numeric_data_extracted[0][1]

    # print(numeric_data_annotated, numeric_data_extracted)

    numeric_annotated_clean = []
    numeric_extracted_clean = []
    ##Clean the numeric data to skip any strings
    for i in range(len(numeric_data_annotated)):
        if (type(numeric_data_annotated[i]) == str) | (type(numeric_data_extracted[i]) == str):
            continue
        else:
            numeric_annotated_clean.append(numeric_data_annotated[i])
            numeric_extracted_clean.append(numeric_data_extracted[i])

    cos_sim = cosine_similarity([numeric_annotated_clean], [numeric_extracted_clean])
    compared_metric.append(cos_sim[0][0])
    
    return compared_metric    

In [155]:
def entity_comparison(entity, label, extracted_dict, text_similarity_threshold = 0.75, numerical_tolerance = 0.027):
    '''
    The tolarance of 2.7% was what was reasonable looking at the absolute difference
    treated_voc 1.18, 1.149, absolute difference 0.026271186440677895

    The text similarity were set to 75% due to the structure example
    FP, NIP, n-i-p, 0.75
    This should be positive
    
    '''
    text_entity = ['stability_type', 'passivating_molecule']
    numerical_entity = ['time', 'efficiency_cont', 'efficiency_tret', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    numerical_exception = ['temperature', 'humidity']

    if entity in text_entity:
        key_to_check = "test_name" if entity == "stability_type" else entity

        # If the key is missing in the extracted annotation, return False Negative
        if (label[entity]!=None) & (extracted_dict[key_to_check]==None):
            # print(f"FN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
            return "FN"
        elif (label[entity]==None) & (extracted_dict[key_to_check]!=None):
            # print(f"TN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
            return "TN"

        label_data = label.get(entity, "")
        extract_data = extracted_dict.get(key_to_check, "")

        # Convert lists to strings if necessary
        if isinstance(label_data, list):
            label_data = " ".join(map(str, label_data))  # Convert list to string
        if isinstance(extract_data, list):
            extract_data = " ".join(map(str, extract_data))  # Convert list to string

        # Ensure values are strings
        if not isinstance(label_data, str) or not isinstance(extract_data, str):
            # print(f"FP, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
            return "FP"  # If data is still not a string, return False Positive

        # Compute similarity score
        similarity = SequenceMatcher(None, label_data.lower(), extract_data.lower()).ratio()

        if similarity > text_similarity_threshold:
            # print(f"TP,{entity} {label_data}, {extract_data}")
            return 'TP'
        else:
            # print(f"FP,{entity} {label_data}, {extract_data}, {similarity}")
            return "FP"
    elif entity in numerical_entity:
        # key_to_check = "control_efficiency" if entity == "efficiency_cont" else ("treatment_efficiency" if entity == "efficiency_tret" else entity)

        # print(f"annotated{label[entity]}")
        # print(f"extracted{extracted_dict[entity]}")
        if extracted_dict[entity] == None:
            extracted_dict[entity] = 0

        # If the key is missing in the extracted annotation, return False Negative
        if (label[entity]!=0) & ((extracted_dict[entity]==0) | (entity not in extracted_dict.keys())):
            # print(f"FN, {label_annotation[id]}, {extraction_annotation[entity]}")
            return "FN"
        elif (label[entity]==0) & (extracted_dict[entity]!=0):
            # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
            return "TN"
        elif (label[entity]==0) & (extracted_dict[entity]==0):
            # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
            return "TN"


        if isinstance(extracted_dict[entity], list):
            ##There was one column with two temperature recorded as a list (probably thermal cycling)
            extracted_dict[entity] = extracted_dict[entity][1]

        # Apply numerical tolerance check
        if (abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )<= numerical_tolerance:

            # print(f"Numerical differences matched: {entity} {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
            return "TP"  # True Positive: Correct numerical extraction
        else:

            # print(f"Numerical differences no match: {entity}, {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
            return "FP"  # False Positive: Incorrect numerical extraction    
    else: 
        if isinstance(label[entity], (float, int)):
            if extracted_dict[entity] == None:
                extracted_dict[entity] = 0

            # If the key is missing in the extracted annotation, return False Negative
            if (label[entity]!=0) & ((extracted_dict[entity]==0) | (entity not in extracted_dict.keys())):
                # print(f"FN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "FN"
            elif (label[entity]==0) & (extracted_dict[entity]!=0):
                # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "TN"
            elif (label[entity]==0) & (extracted_dict[entity]==0):
                # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "TN"


            if isinstance(extracted_dict[entity], list):
                ##There was one column with two temperature recorded as a list (probably thermal cycling)
                extracted_dict[entity] = extracted_dict[entity][1]

            # Apply numerical tolerance check
            if (abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )<= numerical_tolerance:

                # print(f"Numerical differences matched: {entity} {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
                return "TP"  # True Positive: Correct numerical extraction
            else:

                # print(f"Numerical differences no match: {entity}, {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
                return "FP"  # False Positive: Incorrect numerical extraction    
        else:
            # print(label[entity], type(label[entity]))
            if extracted_dict[entity] == None:
                extracted_dict[entity] = 0
            
            if ((extracted_dict[entity]==0) | (entity not in extracted_dict.keys())):
                # print(f"FN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "FN"

            if isinstance(extracted_dict[entity], list):
                ##There was one column with two temperature recorded as a list (probably thermal cycling)
                extracted_dict[entity] = extracted_dict[entity][1]
            
            if isinstance(extracted_dict[entity], str):
                ##Label is str, extraction is str, so perform text similarity
                similarity = SequenceMatcher(None, label[entity].lower(), extracted_dict[entity].lower()).ratio()
                if similarity > text_similarity_threshold:
                    # print(f"TP, {label_data}, {extract_data}, {similarity}")
                    return 'TP'
                else:
                    # print(f"FP, {label_data}, {extract_data}, {similarity}")
                    return "FP"
            else:
                if "+" in label[entity]:
                    # print(label[entity].split("+-"))
                    value = float(label[entity].split("+-")[0])
                    margin_error = float(label[entity].split("+-")[1])
                    range = (value-margin_error, value-margin_error)
                    if (range[0]<= extracted_dict[entity]) & (extracted_dict[entity]<=range[1]):
                        # print(f"TP, {label_data}, {extract_data}, {similarity}")
                        return 'TP'
                    else:
                        # print(f"FP, {label_data}, {extract_data}, {similarity}")
                        return "FP"
                else:
                    lower = float(label[entity].split("-")[0])
                    upper = float(label[entity].split("-")[1])
                    if (lower<= extracted_dict[entity]) & (extracted_dict[entity]<=upper):
                        # print(f"TP, {label_data}, {extract_data}, {similarity}")
                        return 'TP'
                    else:
                        # print(f"FP, {label_data}, {extract_data}, {similarity}")
                        return "FP"


In [156]:
def safe_division(numerator, denominator):
    """Returns division result, or 0 if the denominator is zero."""
    return numerator / denominator if denominator != 0 else 0

In [157]:
def text_comparison(id, label_annotation, extraction_annotation, text_similarity_threshold=0.8):
    """Compares text values using string similarity matching.
    - THE 4 basic variable that is to compare is PEROVSKITE COMPOSITION, ETL, HTL, STRUCTURE
    """

    # Handle special case for structure_pin_nip
    # key_to_check = "pin_nip_structure" if id == "structure_pin_nip" else id

    # If the key is missing in the extracted annotation, return False Negative
    if (label_annotation[id]!=None) & (extraction_annotation[id]==None):
        # print(f"FN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
        return "FN"
    elif (label_annotation[id]==None) & (extraction_annotation[id]!=None):
        # print(f"TN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
        return "TN"

    label_data = label_annotation.get(id, "")
    if id == 'electron_transport_layer' and label_data == "buckminsterfullerene":
        label_data = 'C60'
    extract_data = extraction_annotation.get(id, "")

    # Convert lists to strings if necessary
    if isinstance(label_data, list):
        label_data = " ".join(map(str, label_data))  # Convert list to string
    if isinstance(extract_data, list):
        extract_data = " ".join(map(str, extract_data))  # Convert list to string

    # Ensure values are strings
    if not isinstance(label_data, str) or not isinstance(extract_data, str):
        # print(f"FP, {label_annotation[id]}, {extraction_annotation[id]}")
        return "FP"  # If data is still not a string, return False Positive

    # Compute similarity score
    similarity = SequenceMatcher(None, label_data.lower(), extract_data.lower()).ratio()

    if similarity > text_similarity_threshold:
        # print(f"TP, {label_data}, {extract_data}, {similarity}")
        return 'TP'
    else:
        # print(f"FP, {label_data}, {extract_data}, {similarity}")
        return "FP"


In [158]:
def compare_json(df):
    """
    Compare labeled and extracted JSON data for correctness.

    TP: Correct value extracted by LLM.
    FN: LLM didn't extract this variable.
    FP: LLM extracted a value, but it was incorrect.
    TN: LLM halucinated and returned value that was not extracted
    """
    
    text_variables = ['perovskite_composition', 'electron_transport_layer', 'hole_transport_layer', 'structure_pin_nip']

    
    stability_entity_annotated = ['stability_type', 'temperature', 'time', 'humidity', 'passivating_molecule', 'efficiency_cont', 'efficiency_tret', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    stability_entity_extracted = ['test_name', 'temperature', 'time', 'humidity', 'passivating_molecule','control_efficiency', 'treatment_efficiency', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    
    # Initialize comparison dictionaries
    text_dict = {var: {"TP": 0, "FP": 0, "FN": 0, "TN": 0} for var in text_variables}
    stability_dict = {var: {"TP": 0, "FP": 0, "FN": 0, "TN": 0} for var in stability_entity_annotated}

    for row in df.itertuples():       
        label_value = row.annotation
        extracted_value = row.extracted

        # print(label_value)
        # print(extracted_value)

        for id, label in label_value.items():
            if ('test' in id) and (isinstance(label_value[id], dict)):
                ##Plan for stability test evaluation
                '''
                For each stability condition in annotation, 
                    Pair them with stability condition in extracted
                        With stability of annotation and extraction, use function tests_comparison that returns how similar 2 stabilities are
                    
                Once all the pair is calculated, find the stability name of extraction that was closest to annotation stability. 

                Using this dictionary, we will increment FN, FP, TN, TP for each element of the entity.
                '''
                matched = 0
                stability_match = {}
                for extract_id, extract_label in extracted_value.items():
                    if ('test' in extract_id) and (isinstance(extracted_value[extract_id], dict)):
                        matched += 1
                        match_list = tests_comparison(id, label, extract_id, extract_label)
                        match_list = [0 if item is None else item for item in match_list]
                        # print(extracted_value[extract_id])
                        # print(match_list)
                        stability_match[extract_id] = match_list
        
                if matched == 0:
                    #No stability were extracted, we will add stability_unmatched
                        ##We need to account for if there was NO stability extracted. 
                    for key in stability_dict:
                        if 'FN' in stability_dict[key]:
                            stability_dict[key]['FN'] += 1
                else:
                    stability_match_mean = {stability: np.mean(lis) for stability, lis in stability_match.items()}
                    max_key = max(stability_match_mean, key=stability_match_mean.get)  
                    # print(extracted_value[max_key])
                    ##Now, I need to compare each entity in that found max_key and fill in that FN, dictionary.
                    for entity in label_value[id].keys():
                        if entity == 'efficiency_control':
                            continue
                        if entity == 'perovskite_molecule':
                            continue
                        entity_result = entity_comparison(entity, label, extracted_value[max_key])
                        stability_dict[entity][entity_result] += 1  
            else:  
                result = text_comparison(id, label_value, extracted_value)
                text_dict[id][result] += 1


    # Merge all results
    combined_dict = {**text_dict, **stability_dict}
    # print("Performance for each variable in dictionary:", combined_dict)

    # Compute precision, recall, and F1-score
    variable_list, precision_list, recall_list, f1_list = [], [], [], []
    for variable, performance in combined_dict.items():
        TP, FP, FN = performance["TP"], performance["FP"], performance["FN"]
        
        precision = safe_division(TP, TP + FP)
        recall = safe_division(TP, TP + FN)
        f1 = safe_division(2 * precision * recall, precision + recall)

        variable_list.append(variable)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    return combined_dict, variable_list, precision_list, recall_list, f1_list

In [159]:
dict_result_base, variables_base, precisions_base, recalls_base, f1s_base = compare_json(evaluate_df_base)

In [160]:
dict_result_train, variables_train, precisions_train, recalls_train, f1s_train = compare_json(evaluate_df_train)

In [161]:
dict_result_train

{'perovskite_composition': {'TP': 42, 'FP': 50, 'FN': 9, 'TN': 25},
 'electron_transport_layer': {'TP': 20, 'FP': 63, 'FN': 1, 'TN': 42},
 'hole_transport_layer': {'TP': 41, 'FP': 45, 'FN': 2, 'TN': 38},
 'structure_pin_nip': {'TP': 27, 'FP': 56, 'FN': 10, 'TN': 33},
 'stability_type': {'TP': 0, 'FP': 48, 'FN': 166, 'TN': 0},
 'temperature': {'TP': 52, 'FP': 48, 'FN': 0, 'TN': 114},
 'time': {'TP': 44, 'FP': 57, 'FN': 66, 'TN': 47},
 'humidity': {'TP': 30, 'FP': 16, 'FN': 29, 'TN': 139},
 'passivating_molecule': {'TP': 37, 'FP': 94, 'FN': 5, 'TN': 78},
 'efficiency_cont': {'TP': 7, 'FP': 58, 'FN': 29, 'TN': 120},
 'efficiency_tret': {'TP': 30, 'FP': 143, 'FN': 2, 'TN': 39},
 'control_pce': {'TP': 38, 'FP': 52, 'FN': 6, 'TN': 118},
 'treated_pce': {'TP': 71, 'FP': 80, 'FN': 2, 'TN': 61},
 'control_voc': {'TP': 44, 'FP': 10, 'FN': 7, 'TN': 153},
 'treated_voc': {'TP': 65, 'FP': 23, 'FN': 4, 'TN': 122}}

## Calculate Macro f1 score

In [162]:
def macro_f1(f1_list, weight = None):
    if weight == None:
        #If no weight given, do unweighted average of f1 score
        return sum(f1_list) / len(f1_list)
    total_f1 = 0
    for i in range(len(f1_list)):
        total_f1 += (f1_list[i] * weight[i])
    return total_f1 / sum(weight)
    


In [163]:
# Define column names
columns = ['Macro F1 score weight distribution', 'Base Deepseek 8bit', 'Fine-Tuned Deepseek 4 bit', 'Fine-Tuned Deepseek 8 bit', 'Llama 3 billion parameter']

# Create a DataFrame with NaN values
df_f1scores = pd.DataFrame(np.nan, index=[0, 1, 2, 3, 4, 5, 6], columns=columns)
df_f1scores

Unnamed: 0,Macro F1 score weight distribution,Base Deepseek 8bit,Fine-Tuned Deepseek 4 bit,Fine-Tuned Deepseek 8 bit,Llama 3 billion parameter
0,,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,
5,,,,,
6,,,,,


In [164]:
## Unweighted
macro_train_0 = macro_f1(f1s_train)
macro_train_0


0.49710956767773384

In [165]:
macro_base_0 = macro_f1(f1s_base)
macro_base_0

0.4596853305153966

In [166]:
## unweighted row value
unweighted = ['Macro F1 score with equal weight', macro_base_0, macro_train_0, None, None]
df_f1scores.loc[0] = unweighted


In [167]:
variables_train

['perovskite_composition',
 'electron_transport_layer',
 'hole_transport_layer',
 'structure_pin_nip',
 'stability_type',
 'temperature',
 'time',
 'humidity',
 'passivating_molecule',
 'efficiency_cont',
 'efficiency_tret',
 'control_pce',
 'treated_pce',
 'control_voc',
 'treated_voc']

In [168]:
weights_1 = [1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1]

In [169]:
macro_train_1 = macro_f1(f1s_train, weight = weights_1)
macro_train_1

0.45526861964286586

In [170]:
macro_base_1 = macro_f1(f1s_base, weight = weights_1)
macro_base_1

0.41805507780696827

In [171]:
## Heavier weight on stability value
first_f1 = ['Heavier weight on stability', macro_base_1, macro_train_1, None, None]
df_f1scores.loc[1] = first_f1

In [172]:
variables_train

['perovskite_composition',
 'electron_transport_layer',
 'hole_transport_layer',
 'structure_pin_nip',
 'stability_type',
 'temperature',
 'time',
 'humidity',
 'passivating_molecule',
 'efficiency_cont',
 'efficiency_tret',
 'control_pce',
 'treated_pce',
 'control_voc',
 'treated_voc']

In [173]:
weights_2 = [2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1]

In [174]:
macro_train_2 = macro_f1(f1s_train, weight = weights_2)
macro_train_2

0.49710380333312737

In [175]:
macro_base_2 = macro_f1(f1s_base, weight = weights_2)
macro_base_2

0.46313816953026504

In [176]:
## Heavier weight on perovskite structure
first_f2 = ['Heavier weight on perovskite structure', macro_base_2, macro_train_2, None, None]
df_f1scores.loc[2] = first_f2

In [177]:
variables_train

['perovskite_composition',
 'electron_transport_layer',
 'hole_transport_layer',
 'structure_pin_nip',
 'stability_type',
 'temperature',
 'time',
 'humidity',
 'passivating_molecule',
 'efficiency_cont',
 'efficiency_tret',
 'control_pce',
 'treated_pce',
 'control_voc',
 'treated_voc']

In [178]:
weights_3 = [1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2]

In [179]:
macro_train_3 = macro_f1(f1s_train, weight = weights_3)
macro_train_3

0.5178272699514781

In [180]:
macro_base_3 = macro_f1(f1s_base, weight = weights_3)
macro_base_3

0.4759615201078144

In [181]:
## Heavier weight on numeric data
first_f3 = ['Heavier weight on numeric data', macro_base_3, macro_train_3, None, None]
df_f1scores.loc[3] = first_f3

In [182]:
variables_train

['perovskite_composition',
 'electron_transport_layer',
 'hole_transport_layer',
 'structure_pin_nip',
 'stability_type',
 'temperature',
 'time',
 'humidity',
 'passivating_molecule',
 'efficiency_cont',
 'efficiency_tret',
 'control_pce',
 'treated_pce',
 'control_voc',
 'treated_voc']

In [183]:
weights_4 = [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1]

In [184]:
macro_train_4 = macro_f1(f1s_train, weight = weights_4)
macro_train_4

0.6192780753220124

In [185]:
macro_base_4 = macro_f1(f1s_base, weight = weights_4)
macro_base_4

0.6184904348164809

In [186]:
## Weight to perform prediction 1
first_f4 = ['Weight to perform prediction 1', macro_base_4, macro_train_4, None, None]
df_f1scores.loc[4] = first_f4

In [187]:
variables_train

['perovskite_composition',
 'electron_transport_layer',
 'hole_transport_layer',
 'structure_pin_nip',
 'stability_type',
 'temperature',
 'time',
 'humidity',
 'passivating_molecule',
 'efficiency_cont',
 'efficiency_tret',
 'control_pce',
 'treated_pce',
 'control_voc',
 'treated_voc']

In [188]:
weights_5 = [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1]

In [189]:
macro_train_5 = macro_f1(f1s_train, weight = weights_5)
macro_train_5

0.6470619530812942

In [190]:
macro_base_5 = macro_f1(f1s_base, weight = weights_5)
macro_base_5

0.6117317184490826

In [191]:
## Weight to perform prediction 2
first_f5 = ['Weight to perform prediction 2', macro_base_5, macro_train_5, None, None]
df_f1scores.loc[5] = first_f5

In [192]:
variables_train

['perovskite_composition',
 'electron_transport_layer',
 'hole_transport_layer',
 'structure_pin_nip',
 'stability_type',
 'temperature',
 'time',
 'humidity',
 'passivating_molecule',
 'efficiency_cont',
 'efficiency_tret',
 'control_pce',
 'treated_pce',
 'control_voc',
 'treated_voc']

In [193]:
weights_6 = [1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0]

In [194]:
macro_train_6 = macro_f1(f1s_train, weight = weights_6)
macro_train_6

0.41700936899057983

In [195]:
macro_base_6 = macro_f1(f1s_base, weight = weights_6)
macro_base_6

0.39713474272079524

In [196]:
## Weight to perform prediction 3
first_f6 = ['Weight to perform prediction 3', macro_base_6, macro_train_6, None, None]
df_f1scores.loc[6] = first_f6

In [197]:
df_f1scores

Unnamed: 0,Macro F1 score weight distribution,Base Deepseek 8bit,Fine-Tuned Deepseek 4 bit,Fine-Tuned Deepseek 8 bit,Llama 3 billion parameter
0,Macro F1 score with equal weight,0.459685,0.49711,,
1,Heavier weight on stability,0.418055,0.455269,,
2,Heavier weight on perovskite structure,0.463138,0.497104,,
3,Heavier weight on numeric data,0.475962,0.517827,,
4,Weight to perform prediction 1,0.61849,0.619278,,
5,Weight to perform prediction 2,0.611732,0.647062,,
6,Weight to perform prediction 3,0.397135,0.417009,,


### Different weight to consider
- Unweight
- Heavier weight on stability
- Heavier weight on perovskite structure
- Heavier weight on numeric data
- Weight to perform prediction 1
- Weight to perform prediction 2
- Weight to perform prediction 3