# This file compares the teamtat annotation with Extraction performed (Json)

In [1]:
irrelevent_papers = [26,28,29,30,32,33,34,35,38,43,44,45,52,54,55,56,57,58,63,66,68,69,70,78,80,83,84,86,87,88,89,90,91,92,93,94,98,100,101,102,103,104,105,106,108,109,110,111,112,115,116,117,119,121,125,128,129,130,134,136,138,139, 140]
relevent_bad = [1, 18, 20, 25, 27, 41, 51, 61, 71, 76, 135, 141, 145]

relevent_good = [i for i in range(0, 150) if i not in irrelevent_papers and i not in relevent_bad]

In [2]:
len(relevent_good)

74

In [3]:
from sklearn.metrics import precision_score, recall_score, f1_score
from difflib import SequenceMatcher
import numpy as np
import json
import os
import xml.etree.ElementTree as ET 
import pandas as pd
import re
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
#Evaluation schema 2
data = {
    'perovskite_composition': None,
    'electron_transport_layer': None,
    'hole_transport_layer': None,
    'structure_pin_nip': None,
    'passivating_molecule': None,
    'control_pce': None,
    'treated_pce': None,
    'control_voc': None,
    'treated_voc': None,
    'test_1': {
        'stability_type': None,
        'humidity': None,
        'temperature': None,
        'time': None,
        'efficiency_tret': None,
        'efficiency_cont': None
    }
}

data


{'perovskite_composition': None,
 'electron_transport_layer': None,
 'hole_transport_layer': None,
 'structure_pin_nip': None,
 'passivating_molecule': None,
 'control_pce': None,
 'treated_pce': None,
 'control_voc': None,
 'treated_voc': None,
 'test_1': {'stability_type': None,
  'humidity': None,
  'temperature': None,
  'time': None,
  'efficiency_tret': None,
  'efficiency_cont': None}}

## File Preparation

In [5]:
def str_toJson(string):
    ##The json output from annotation dataframe was not in correct json format
    # We will change the None to null
    json_string = string.replace("None", "null")

    try:
        # Try to load the JSON string
        json_object = json.loads(json_string)
        return json_object
    except json.JSONDecodeError as e:
        # Catch JSONDecodeError if the string is not valid JSON
        print(f"Error decoding JSON: {e}")
        return None
    except Exception as e:
        # Catch any other exceptions
        print(f"An error occurred: {e}")
        return None

In [6]:
def include_passivating(dictionary):
    ##In extraction json, realized that some extraction has passivating molecule that is NOT included in its stability testing. 
    ## Since passivating molecule (if exist) needs to be in stability testing (nexted dictionary), we will transfer the information and spit out a cleaned dictionary. 
    if "passivating_molecule" in dictionary.keys():
        passivating = dictionary['passivating_molecule']
        del dictionary['passivating_molecule']
        
        for entity in dictionary.keys():
            if entity.startswith('test'):
                # print(i['entity'])
                if type(dictionary[entity]) == dict:
                    if 'passivating_molecule' in dictionary[entity].keys():
                        continue
                    else:
                        # print("Have to include passivating molecule in tests")
                        dictionary[entity]['passivating_molecule'] = passivating
        
    return dictionary

In [7]:
def convert_numeric_outside(dictionary):
    key_list = ['control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    for key in key_list:
        if dictionary[key] != None:
            dictionary[key] = float(dictionary[key])
    return dictionary

In [8]:
## Convert all numerical data into float for both

def convert_numeric(dictionary):
    if dictionary == None:
        return None
    numerical_key = ['time', 'efficiency_cont', 'efficiency_tret', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    exception_numeric = ['humidity', 'temperature']

    translation_table = str.maketrans('', '', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*()')
    for key in dictionary.keys():
        if (key.startswith('test')) & (type(dictionary[key]) == dict):
            for entity in dictionary[key].keys():
                if entity in numerical_key:
                    # print(dictionary[key][entity])
                    if isinstance(dictionary[key][entity], str): 
                        substitute = re.sub(r'[^0-9.]', '', dictionary[key][entity][:4])
                        if len(substitute) != 0:
                            numerical_value = float(substitute)
                            dictionary[key][entity] = numerical_value
                        else:
                            dictionary[key][entity] = None
                elif entity in exception_numeric:
                    if isinstance(dictionary[key][entity], str): 
                        if "-" not in dictionary[key][entity]:
                            # print("regular_case",dictionary[key][entity])
                            substitute = re.sub(r'[^0-9.]', '', dictionary[key][entity][:4])
                            if len(substitute) != 0:
                                numerical_value = float(substitute)
                                dictionary[key][entity] = numerical_value
                            else:
                                dictionary[key][entity] = None
                        # else:
                            
                        #     print(dictionary[key][entity])
        elif ('test_' in key) & (type(dictionary[key]) == dict):
            for entity in dictionary[key].keys():
                if entity in numerical_key:
                    # print(dictionary[key][entity])
                    if isinstance(dictionary[key][entity], str): 
                        substitute = re.sub(r'[^0-9.]', '', dictionary[key][entity][:4])
                        if len(substitute) != 0:
                            numerical_value = float(substitute)
                            dictionary[key][entity] = numerical_value
                        else:
                            dictionary[key][entity] = None
                elif entity in exception_numeric:
                    if isinstance(dictionary[key][entity], str): 
                        if "-" not in dictionary[key][entity]:
                            # print("regular_case",dictionary[key][entity])
                            substitute = re.sub(r'[^0-9.]', '', dictionary[key][entity][:4])
                            if len(substitute) != 0:
                                numerical_value = float(substitute)
                                dictionary[key][entity] = numerical_value
                            else:
                                dictionary[key][entity] = None
                        # else:
                            
                        #     print(dictionary[key][entity])
        elif key in numerical_key:
            if isinstance(dictionary[key], str): 
                substitute = re.sub(r'[^0-9.]', '', dictionary[key][:4])
                if len(substitute) != 0:
                    numerical_value = float(substitute)
                    dictionary[key] = numerical_value
                else:
                    dictionary[key] = None
        elif key in exception_numeric:
            if isinstance(dictionary[key], str): 
                if "-" not in dictionary[key]:
                    # print("regular_case",dictionary[key][entity])
                    substitute = re.sub(r'[^0-9.]', '', dictionary[key][:4])
                    if len(substitute) != 0:
                        numerical_value = float(substitute)
                        dictionary[key] = numerical_value
                    else:
                        dictionary[key] = None




    return dictionary

In [9]:
def convert_efficiency(dictionary):
    entity_decimal = ['efficiency_cont','efficiency_tret']
    for key in dictionary.keys():
        if (key.startswith('test')) & (type(dictionary[key]) == dict):
            for entity in dictionary[key].keys():
                if (entity in entity_decimal) and (dictionary[key][entity] != None):
                    if dictionary[key][entity] == dictionary[key][entity] > 1:
                        dictionary[key][entity] = dictionary[key][entity] / 100
    return dictionary


    

#### Analyzing these outputs

Annotation notes: 
- THE 4 basic variable that is to compare is PEROVSKITE COMPOSITION, ETL, HTL, STRUCTURE
- Stability entity: efficiency_control is wrong, All value is None, so ignore. 
- Common entity: ['stability_type', 'passivating_molecule', 'humidity', 'temperature', 'time', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc', 'efficiency_tret'] 
    - 'efficiency_cont' are included or not

- the efficiency in extracted data need to be converted to decimals since it is in percentage SOMETIMES
    - 'control_efficiency' and 'treatment_efficiency can be ignored


Extraction notes:
- some extraction has passivating molecule that is NOT included in its stability testing. 

### Loading Teamtat Annotation as dataframe

In [10]:
with open('data/annotations_flattened.json', 'r') as f:
    json_data = json.load(f)

In [11]:
flattened_format = []
for key in json_data:
    papers = json_data[key]
    if papers is None:
        flattened_format.append({ "paper_id": key, "output": None })
        continue
    for passivator in papers:
        paper_data = papers[passivator]
        flattened_format.append({ "paper_id": key, "output": paper_data })

In [12]:
annotation_df = pd.DataFrame(flattened_format)
annotation_df.columns = ['paper_num', 'output']
annotation_df["paper_num"] = annotation_df["paper_num"].astype(int)
annotation_df = annotation_df.sort_values(by = 'paper_num')
annotation_df

Unnamed: 0,paper_num,output
125,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...
98,1,"{'perovskite_composition': None, 'electron_tra..."
69,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...
92,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
27,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
...,...,...
146,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
181,147,"{'perovskite_composition': 'FAPbI 3', 'electro..."
58,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...
57,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...


In [13]:
annotation_df[annotation_df['output'].isnull()]

Unnamed: 0,paper_num,output
4,25,
66,26,
31,28,
89,29,
86,30,
...,...,...
105,135,
193,136,
94,138,
9,139,


In [14]:
## Get the annotation_df with only relevent good papers. 
annotation_df = annotation_df[annotation_df['paper_num'].isin(relevent_good)]
annotation_df

Unnamed: 0,paper_num,output
125,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...
69,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...
92,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
27,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
13,5,"{'perovskite_composition': 'FA(MA)PbI 3', 'ele..."
...,...,...
146,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
181,147,"{'perovskite_composition': 'FAPbI 3', 'electro..."
58,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...
57,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...


In [15]:
annotation_df = annotation_df[annotation_df['output'].notnull()]
annotation_df

Unnamed: 0,paper_num,output
125,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...
69,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...
92,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
27,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
13,5,"{'perovskite_composition': 'FA(MA)PbI 3', 'ele..."
...,...,...
146,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
181,147,"{'perovskite_composition': 'FAPbI 3', 'electro..."
58,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...
57,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...


In [16]:
annotation_df.iloc[0]['output']

{'perovskite_composition': 'Cs0.05FA0.85MA0.1PbI3',
 'electron_transport_layer': 'C60',
 'hole_transport_layer': '2PACz',
 'structure_pin_nip': 'PIN',
 'passivating_molecule': '4-chlorobenzenesulfonate',
 'control_pce': '24',
 'treated_pce': '26.9',
 'control_voc': None,
 'treated_voc': '1.18',
 'test_1': {'stability_type': 'ISOSL3',
  'humidity': '50',
  'temperature': '65',
  'time': '1200',
  'efficiency_cont': None,
  'efficiency_tret': '95'},
 'test_3': {'stability_type': 'ISOSD2',
  'humidity': None,
  'temperature': '85',
  'time': '1500',
  'efficiency_cont': None,
  'efficiency_tret': '95'},
 'test_2': {'stability_type': 'ISOSL3',
  'humidity': '50',
  'temperature': '85',
  'time': '540',
  'efficiency_cont': None,
  'efficiency_tret': '87'}}

In [17]:
# #Teamtat Annotation
# annotation_df = pd.read_csv("data/annotation_flat.csv")[['paper_num', 'output']]
# annotation_df = annotation_df.sort_values(by = ['paper_num'])
# annotation_df

In [18]:
annotation_df[annotation_df['paper_num'] == 7]['output'].iloc[5]

{'perovskite_composition': 'Cs0.05(MA)0.16(FA)0.79Pb(I0.83Br0.17 )3',
 'electron_transport_layer': 'C60',
 'hole_transport_layer': None,
 'structure_pin_nip': 'NIP',
 'passivating_molecule': 'phenylethylammonium lead iodide',
 'control_pce': None,
 'treated_pce': None,
 'control_voc': '1.05',
 'treated_voc': '1.11',
 'test_7': {'stability_type': 'ISOSD',
  'humidity': '60',
  'temperature': None,
  'time': '1000',
  'efficiency_cont': None,
  'efficiency_tret': '90'},
 'test_8': {'stability_type': 'ISOST',
  'humidity': None,
  'temperature': '60',
  'time': '100',
  'efficiency_cont': None,
  'efficiency_tret': '75'}}

## Clean the annotation:
- Drop row where dictionary doesn't have test_
- Convert numeric


In [19]:
def filter_output(dictionary):
    ## I want to drop row where passivation is none. 
    if dictionary['passivating_molecule'] == None:
        return True
    return False

In [20]:
annotation_filter = annotation_df['output'].apply(filter_output)
annotation_filter

125    False
69     False
92     False
27     False
13     False
       ...  
146     True
181    False
58     False
57     False
174    False
Name: output, Length: 118, dtype: bool

In [21]:
annotation_df['filter'] = annotation_filter
annotation_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annotation_df['filter'] = annotation_filter


Unnamed: 0,paper_num,output,filter
125,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...,False
69,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...,False
92,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...,False
27,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...,False
13,5,"{'perovskite_composition': 'FA(MA)PbI 3', 'ele...",False
...,...,...,...
146,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...,True
181,147,"{'perovskite_composition': 'FAPbI 3', 'electro...",False
58,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...,False
57,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...,False


In [22]:
annotation_df = annotation_df[annotation_df['filter'] == False][['paper_num', 'output']]
annotation_df

Unnamed: 0,paper_num,output
125,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...
69,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...
92,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
27,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
13,5,"{'perovskite_composition': 'FA(MA)PbI 3', 'ele..."
...,...,...
147,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
181,147,"{'perovskite_composition': 'FAPbI 3', 'electro..."
58,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...
57,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...


In [23]:
##Str_toJson is irrelevent since output was already in dictionary format!!!

##Change the format (minor) to be converted to json
# annotation_df['output'] = annotation_df['output'].apply(str_toJson)
# annotation_df

In [24]:
len(annotation_df['paper_num'].unique())

72

In [25]:
annotation_df['output'][7]

{'perovskite_composition': '(FAPbI3)0.95(MAPbBr3)0.05',
 'electron_transport_layer': 'TitaniumDioxide',
 'hole_transport_layer': 'Spiro-OMeTAD',
 'structure_pin_nip': None,
 'passivating_molecule': 'dodecylammonium iodide',
 'control_pce': '5.9',
 'treated_pce': '15.1',
 'control_voc': None,
 'treated_voc': None,
 'test_3': {'stability_type': 'ISOSD',
  'humidity': '85',
  'temperature': 'room temperature',
  'time': '100',
  'efficiency_cont': None,
  'efficiency_tret': '90'},
 'test_4': {'stability_type': 'ISOSD',
  'humidity': '65',
  'temperature': '65',
  'time': '100',
  'efficiency_cont': '20',
  'efficiency_tret': '80'}}

In [26]:
annotation_df['output'] = annotation_df['output'].apply(convert_numeric)
annotation_df['output'] = annotation_df['output'].apply(convert_numeric_outside)

In [27]:
annotation_df

Unnamed: 0,paper_num,output
125,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...
69,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...
92,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
27,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
13,5,"{'perovskite_composition': 'FA(MA)PbI 3', 'ele..."
...,...,...
147,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
181,147,"{'perovskite_composition': 'FAPbI 3', 'electro..."
58,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...
57,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...


In [28]:
# # Exporting annotation
annotation_df.to_csv('annotation.csv', index=False)

### Loading in JSON extraction

In [29]:
def convert_efficiency_key(dict):
    for key, item in dict.items():
        if 'test' in key:
            if 'retained_proportion_cont' in dict[key]:
                dict[key]['efficiency_cont'] = dict[key].pop('retained_proportion_cont')
            if 'retained_proportion_tret' in dict[key]:
                dict[key]['efficiency_tret'] = dict[key].pop('retained_proportion_tret')
    return dict
            

In [30]:
## extraction performed by basemodel
# Read JSON from a file
with open("data/deepseek_base_updateschema.json", 'r') as f:
    extraction = json.load(f)

extraction_base = pd.DataFrame(list(extraction.items()), columns=['paper_num', 'output'])
extraction_base['paper_num'] = pd.to_numeric(extraction_base['paper_num'])
extraction_base = extraction_base.sort_values('paper_num')
# extraction_base['output'] = extraction_base['output'].apply(include_passivating)
extraction_base['output'] = extraction_base['output'].apply(convert_numeric)
extraction_base['output'] = extraction_base['output'].apply(convert_efficiency)
extraction_base

Unnamed: 0,paper_num,output
77,0,"{'perovskite_composition': 'FAPbI3', 'electron..."
127,1,{'perovskite_composition': 'FA 1-x MA x PbI 3'...
7,2,"{'perovskite_composition': '(BA)2PbI4', 'elect..."
35,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
31,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
...,...,...
38,145,{'perovskite_composition': 'Cs 0.05 (MA 0.17 F...
89,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
121,147,"{'perovskite_composition': 'FAPbI3', 'electron..."
84,148,{'perovskite_composition': 'Cs 0.05 FA 0.85 MA...


In [46]:
## extraction performed by finetuned deepseek
# Read JSON from a file
with open("data/deepseek_8bit_finetuned_tests_nested.json", 'r') as f:
    extraction = json.load(f)

extraction_train = pd.DataFrame(list(extraction.items()), columns=['paper_num', 'output'])
extraction_train['paper_num'] = pd.to_numeric(extraction_train['paper_num'])
extraction_train = extraction_train.sort_values('paper_num')

##Must convert str --> json
TBD
# extraction_train['output'] = extraction_train['output'].apply(str_toJson)
extraction_train['output'] = extraction_train['output'].apply(include_passivating)
extraction_train['output'] = extraction_train['output'].apply(convert_numeric)
extraction_train['output'] = extraction_train['output'].apply(convert_efficiency)
extraction_train['output'] = extraction_train['output'].apply(convert_efficiency_key)
extraction_train

NameError: name 'TBD' is not defined

In [47]:
extraction_train.iloc[4]['output']

"{'perovskite_composition': '2D perovskite', 'electron_transport_layer': 'C60', 'pin_nip_structure': 'NIP', 'hole_transport_layer': 'Spiro-OMeTAD', 'passivating_molecule': '4-fluorophenylethylammonium', 'control_pce': None, 'control_voc': None, 'treated_pec': '21.6', 'treated_voc': '1.16', 'test_1': {'test_name': 'ISOS-L', 'temperature': '85', 'time': '1000', 'humidity': '85', 'retained_percentage_cont': '90', 'retained_percentage_tret': '95'}}"

## Merging dataframe

In [31]:
evaluate_df_base = annotation_df.merge(extraction_base, left_on='paper_num', right_on='paper_num')[["paper_num", "output_x",'output_y']]
evaluate_df_base.columns = ['paper_num', 'annotation', 'extracted']
evaluate_df_base

Unnamed: 0,paper_num,annotation,extracted
0,0,{'perovskite_composition': 'Cs0.05FA0.85MA0.1P...,"{'perovskite_composition': 'FAPbI3', 'electron..."
1,2,{'perovskite_composition': '(FAPbI3)0.95(MAPbB...,"{'perovskite_composition': '(BA)2PbI4', 'elect..."
2,3,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...,{'perovskite_composition': 'Cs0.05(MA0.10FA0.8...
3,4,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...,{'perovskite_composition': '(MAPbBr3)0.05(FAPb...
4,5,"{'perovskite_composition': 'FA(MA)PbI 3', 'ele...","{'perovskite_composition': 'FA(MA)PbI3', 'elec..."
...,...,...,...
89,146,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...,{'perovskite_composition': 'Cs0.05(MA0.05FA0.9...
90,147,"{'perovskite_composition': 'FAPbI 3', 'electro...","{'perovskite_composition': 'FAPbI3', 'electron..."
91,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...,{'perovskite_composition': 'Cs 0.05 FA 0.85 MA...
92,148,{'perovskite_composition': 'Cs0.05FA0.85MA0.10...,{'perovskite_composition': 'Cs 0.05 FA 0.85 MA...


In [32]:
## TODO: We should technically, choose one row from each paper that best matched the passivating name for annotation and extracted and perform evaluation
evaluate_df_base[evaluate_df_base['paper_num'] == 7]

Unnamed: 0,paper_num,annotation,extracted
6,7,{'perovskite_composition': 'Cs0.05(MA)0.16(FA)...,"{'perovskite_composition': 'C4N9H12)2PbI7', 'e..."
7,7,{'perovskite_composition': 'Cs0.05(MA)0.16(FA)...,"{'perovskite_composition': 'C4N9H12)2PbI7', 'e..."
8,7,{'perovskite_composition': 'Cs0.05(MA)0.16(FA)...,"{'perovskite_composition': 'C4N9H12)2PbI7', 'e..."
9,7,{'perovskite_composition': 'Cs0.05(MA)0.16(FA)...,"{'perovskite_composition': 'C4N9H12)2PbI7', 'e..."
10,7,{'perovskite_composition': 'Cs0.05(MA)0.16(FA)...,"{'perovskite_composition': 'C4N9H12)2PbI7', 'e..."
11,7,{'perovskite_composition': 'Cs0.05(MA)0.16(FA)...,"{'perovskite_composition': 'C4N9H12)2PbI7', 'e..."
12,7,{'perovskite_composition': 'Cs0.05(MA)0.16(FA)...,"{'perovskite_composition': 'C4N9H12)2PbI7', 'e..."


In [33]:
def get_passivation(dict):
    return dict['passivating_molecule']

In [34]:
annotation_passivation = evaluate_df_base['annotation'].apply(get_passivation)
annotation_passivation

0                              4-chlorobenzenesulfonate
1                       butylammonium lead tetra iodide
2                     Barium methylammonium lead iodide
3                          4vinylbenzylammonium bromide
4                              iso-butylammonium iodide
                            ...                        
89    ortho-carborane decorated with phenylamino groups
90    carbazole-triphenylamine and phenylammonium io...
91                           phenylethylammonium iodide
92                   4-tert-butyl-benzylammonium iodide
93              Trifluoromethyl-phenylammonium chloride
Name: annotation, Length: 94, dtype: object

In [35]:
extraction_passivation = evaluate_df_base['extracted'].apply(get_passivation)
extraction_passivation

0                       Cyclohexylmethylammonium iodide
1                                                  None
2                                          BA2MA2Pb3I10
3                   vinylbenzylammonium bromide (VBABr)
4                               isobutylammonium iodide
                            ...                        
89    o-carborane decorated with phenylamino groups ...
90    4-(3,6-bis(4-(bis(4-methoxyphenyl)amino)phenyl...
91            4-tert-butylbenzylammonium iodide (tBBAI)
92            4-tert-butylbenzylammonium iodide (tBBAI)
93                     4-trifluoromethyl-phenylammonium
Name: extracted, Length: 94, dtype: object

In [52]:
evaluate_df_train = annotation_df.merge(extraction_train, left_on='first_num', right_on='paper_num')[["paper_num", "output_x",'output_y']]
evaluate_df_train.columns = ['paper_num', 'annotation', 'extracted']
evaluate_df_train

KeyError: 'first_num'

In [53]:
# evaluate_df.to_csv('merged.csv', index=False)

In [151]:
for row in evaluate_df_train.itertuples():
    label_value = row.annotation
    extracted_value = row.extracted

    print(label_value)
    print(extracted_value)

{'perovskite_composition': 'Cs0.05FA0.85MA0.1PbI3', 'electron_transport_layer': 'C60', 'hole_transport_layer': '2PACz', 'structure_pin_nip': 'PIN', 'test_1': {'stability_type': 'ISOSL3', 'passivating_molecule': '4-chlorobenzenesulfonate', 'humidity': 50.0, 'temperature': 65.0, 'time': 1200.0, 'control_pce': 24.0, 'treated_pce': 26.9, 'control_voc': 0, 'treated_voc': 1.18, 'efficiency_cont': 0, 'efficiency_tret': 95.0}, 'test_1_2': {'stability_type': 'ISOSL3', 'passivating_molecule': '4-chlorobenzenesulfonate', 'humidity': 50.0, 'temperature': 85.0, 'time': 540.0, 'control_pce': 24.0, 'treated_pce': 26.9, 'control_voc': 0, 'treated_voc': 0, 'efficiency_cont': 0, 'efficiency_tret': 87.0}, 'test_2': {'stability_type': 'ISOSD2', 'passivating_molecule': '4-chlorobenzenesulfonate', 'humidity': 0, 'temperature': 85.0, 'time': 1500.0, 'control_pce': 24.0, 'treated_pce': 26.9, 'control_voc': 0, 'treated_voc': 0, 'efficiency_cont': 0, 'efficiency_tret': 95.0}}
{'perovskite_composition': 'FAPbI3'

In [152]:
evaluate_df["annotation"][6]

NameError: name 'evaluate_df' is not defined

In [153]:
evaluate_df["extracted"][1]

NameError: name 'evaluate_df' is not defined

## Evaluation

- We need precision and recall for EACH variable
- For each variable, calculate the F1 score - There is F1 score for each variable
- Take a weighted average ***For now, just take the average.

In [36]:
def tests_comparison(stability_annotated, label_dict, stability_extracted, extract_dict):
    # print(stability_annotated, label_dict, stability_extracted, extract_dict)
    stability_entity_annotated = ['stability_type', 'temperature', 'time', 'humidity', 'efficiency_cont', 'efficiency_tret']
    # stability_entity_extracted = ['test_name', 'passivating_molecule', 'temperature', 'time', 'humidity', 'control_efficiency', 'treatment_efficiency', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    
    # print(f"stability_annotated{stability_annotated}")
    # print(f"label_dict{label_dict}")
    # print(f"stability_extracted{stability_extracted}")
    # print(f"extract_dict{extract_dict}")



    compared_metric = []
    numeric_data_annotated = []
    numeric_data_extracted = []
    for entity_i in range(len(stability_entity_annotated)):
        if entity_i < 1:
            # print(stability_annotated[entity_i])
            if stability_entity_annotated[entity_i] not in extract_dict.keys():
                extract_dict[stability_entity_annotated[entity_i]] = None

            if (label_dict[stability_entity_annotated[entity_i]] == None) | (extract_dict[stability_entity_annotated[entity_i]] == None):
                compared_metric.append(None)
            else:
                ##Text entity, perform Sequence Matcher 
                compared = SequenceMatcher(None, label_dict[stability_entity_annotated[entity_i]], extract_dict[stability_entity_annotated[entity_i]]).ratio()
                # print(compared)
                if entity_i == 0:
                    if compared > 0.9:
                        compared_metric.append(1)
                    else:
                        compared_metric.append(0)
                else:
                    compared_metric.append(compared)
        else:
            if stability_entity_annotated[entity_i] not in extract_dict.keys():
                extract_dict[stability_entity_annotated[entity_i]] = 0
            elif extract_dict[stability_entity_annotated[entity_i]] == None:
                extract_dict[stability_entity_annotated[entity_i]] = 0

            if stability_entity_annotated[entity_i] not in label_dict.keys():
                label_dict[stability_entity_annotated[entity_i]] = 0
            elif label_dict[stability_entity_annotated[entity_i]] == None:
                label_dict[stability_entity_annotated[entity_i]] = 0

                
            numeric_data_annotated.append(label_dict[stability_entity_annotated[entity_i]])
            numeric_data_extracted.append(extract_dict[stability_entity_annotated[entity_i]])

    if isinstance(numeric_data_extracted[0], list):
        ##There was one column with two temperature recorded as a list (probably thermal cycling)
        numeric_data_extracted[0] = numeric_data_extracted[0][1]

    # print(numeric_data_annotated, numeric_data_extracted)

    numeric_annotated_clean = []
    numeric_extracted_clean = []
    ##Clean the numeric data to skip any strings
    for i in range(len(numeric_data_annotated)):
        if (type(numeric_data_annotated[i]) == str) | (type(numeric_data_extracted[i]) == str):
            continue
        else:
            numeric_annotated_clean.append(numeric_data_annotated[i])
            numeric_extracted_clean.append(numeric_data_extracted[i])

    cos_sim = cosine_similarity([numeric_annotated_clean], [numeric_extracted_clean])
    compared_metric.append(cos_sim[0][0])
    
    return compared_metric    

In [37]:
def entity_comparison(entity, label, extracted_dict, text_similarity_threshold = 0.75, numerical_tolerance = 0.027):
    '''
    The tolarance of 2.7% was what was reasonable looking at the absolute difference
    treated_voc 1.18, 1.149, absolute difference 0.026271186440677895

    The text similarity were set to 75% due to the structure example
    FP, NIP, n-i-p, 0.75
    This should be positive
    
    '''
    text_entity = ['stability_type', 'passivating_molecule']
    numerical_entity = ['time', 'efficiency_cont', 'efficiency_tret', 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    numerical_exception = ['temperature', 'humidity']

    if entity in text_entity:
        # key_to_check = "test_name" if entity == "stability_type" else entity

        # If the key is missing in the extracted annotation, return False Negative
        if (label[entity]!=None) & (extracted_dict[entity]==None):
            # print(f"FN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
            return "FN"
        elif (label[entity]==None) & (extracted_dict[entity]!=None):
            # print(f"TN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
            return "TN"

        label_data = label.get(entity, "")
        extract_data = extracted_dict.get(entity, "")

        # Convert lists to strings if necessary
        if isinstance(label_data, list):
            label_data = " ".join(map(str, label_data))  # Convert list to string
        if isinstance(extract_data, list):
            extract_data = " ".join(map(str, extract_data))  # Convert list to string

        # Ensure values are strings
        if not isinstance(label_data, str) or not isinstance(extract_data, str):
            # print(f"FP, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
            return "FP"  # If data is still not a string, return False Positive

        # Compute similarity score
        similarity = SequenceMatcher(None, label_data.lower(), extract_data.lower()).ratio()

        if similarity > text_similarity_threshold:
            # print(f"TP,{entity} {label_data}, {extract_data}")
            return 'TP'
        else:
            # print(f"FP,{entity} {label_data}, {extract_data}, {similarity}")
            return "FP"
    elif entity in numerical_entity:
        # key_to_check = "control_efficiency" if entity == "efficiency_cont" else ("treatment_efficiency" if entity == "efficiency_tret" else entity)

        # print(f"annotated{label[entity]}")
        # print(f"extracted{extracted_dict[entity]}")
        if extracted_dict[entity] == None:
            extracted_dict[entity] = 0

        # If the key is missing in the extracted annotation, return False Negative
        if (label[entity]!=0) & ((extracted_dict[entity]==0) | (entity not in extracted_dict.keys())):
            # print(f"FN, {label_annotation[id]}, {extraction_annotation[entity]}")
            return "FN"
        elif (label[entity]==0) & (extracted_dict[entity]!=0):
            # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
            return "TN"
        elif (label[entity]==0) & (extracted_dict[entity]==0):
            # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
            return "TN"


        if isinstance(extracted_dict[entity], list):
            ##There was one column with two temperature recorded as a list (probably thermal cycling)
            extracted_dict[entity] = extracted_dict[entity][1]

        # Apply numerical tolerance check
        if (abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )<= numerical_tolerance:

            # print(f"Numerical differences matched: {entity} {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
            return "TP"  # True Positive: Correct numerical extraction
        else:

            # print(f"Numerical differences no match: {entity}, {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
            return "FP"  # False Positive: Incorrect numerical extraction    
    else: 
        if isinstance(label[entity], (float, int)):
            if extracted_dict[entity] == None:
                extracted_dict[entity] = 0

            # If the key is missing in the extracted annotation, return False Negative
            if (label[entity]!=0) & ((extracted_dict[entity]==0) | (entity not in extracted_dict.keys())):
                # print(f"FN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "FN"
            elif (label[entity]==0) & (extracted_dict[entity]!=0):
                # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "TN"
            elif (label[entity]==0) & (extracted_dict[entity]==0):
                # print(f"TN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "TN"


            if isinstance(extracted_dict[entity], list):
                ##There was one column with two temperature recorded as a list (probably thermal cycling)
                extracted_dict[entity] = extracted_dict[entity][1]

            # Apply numerical tolerance check
            if (abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )<= numerical_tolerance:

                # print(f"Numerical differences matched: {entity} {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
                return "TP"  # True Positive: Correct numerical extraction
            else:

                # print(f"Numerical differences no match: {entity}, {label[entity]}, {extracted_dict[entity]}, absolute difference {(abs(label[entity] - extracted_dict[entity])) / (abs(label[entity]) )}")
                return "FP"  # False Positive: Incorrect numerical extraction    
        else:
            # print(label[entity], type(label[entity]))
            if extracted_dict[entity] == None:
                extracted_dict[entity] = 0
            
            if ((extracted_dict[entity]==0) | (entity not in extracted_dict.keys())):
                # print(f"FN, {label_annotation[id]}, {extraction_annotation[entity]}")
                return "FN"

            if isinstance(extracted_dict[entity], list):
                ##There was one column with two temperature recorded as a list (probably thermal cycling)
                extracted_dict[entity] = extracted_dict[entity][1]
            
            if isinstance(extracted_dict[entity], str):
                ##Label is str, extraction is str, so perform text similarity
                similarity = SequenceMatcher(None, label[entity].lower(), extracted_dict[entity].lower()).ratio()
                if similarity > text_similarity_threshold:
                    # print(f"TP, {label_data}, {extract_data}, {similarity}")
                    return 'TP'
                else:
                    # print(f"FP, {label_data}, {extract_data}, {similarity}")
                    return "FP"
            else:
                if "+" in label[entity]:
                    # print(label[entity].split("+-"))
                    value = float(label[entity].split("+-")[0])
                    margin_error = float(label[entity].split("+-")[1])
                    range = (value-margin_error, value-margin_error)
                    if (range[0]<= extracted_dict[entity]) & (extracted_dict[entity]<=range[1]):
                        # print(f"TP, {label_data}, {extract_data}, {similarity}")
                        return 'TP'
                    else:
                        # print(f"FP, {label_data}, {extract_data}, {similarity}")
                        return "FP"
                else:
                    lower = float(label[entity].split("-")[0])
                    upper = float(label[entity].split("-")[1])
                    if (lower<= extracted_dict[entity]) & (extracted_dict[entity]<=upper):
                        # print(f"TP, {label_data}, {extract_data}, {similarity}")
                        return 'TP'
                    else:
                        # print(f"FP, {label_data}, {extract_data}, {similarity}")
                        return "FP"


In [38]:
def safe_division(numerator, denominator):
    """Returns division result, or 0 if the denominator is zero."""
    return numerator / denominator if denominator != 0 else 0

In [39]:
def text_comparison(id, label_annotation, extraction_annotation, text_similarity_threshold=0.8):
    """Compares text values using string similarity matching.
    - THE 4 basic variable that is to compare is PEROVSKITE COMPOSITION, ETL, HTL, STRUCTURE
    """

    # Handle special case for structure_pin_nip
    # key_to_check = "pin_nip_structure" if id == "structure_pin_nip" else id

    # If the key is missing in the extracted annotation, return False Negative
    if (label_annotation[id]!=None) & (extraction_annotation[id]==None):
        # print(f"FN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
        return "FN"
    elif (label_annotation[id]==None) & (extraction_annotation[id]!=None):
        # print(f"TN, {label_annotation[id]}, {extraction_annotation[key_to_check]}")
        return "TN"

    label_data = label_annotation.get(id, "")
    if id == 'electron_transport_layer' and label_data == "buckminsterfullerene":
        label_data = 'C60'
    extract_data = extraction_annotation.get(id, "")

    # Convert lists to strings if necessary
    if isinstance(label_data, list):
        label_data = " ".join(map(str, label_data))  # Convert list to string
    if isinstance(extract_data, list):
        extract_data = " ".join(map(str, extract_data))  # Convert list to string

    # Ensure values are strings
    if not isinstance(label_data, str) or not isinstance(extract_data, str):
        # print(f"FP, {label_annotation[id]}, {extraction_annotation[id]}")
        return "FP"  # If data is still not a string, return False Positive

    # Compute similarity score
    similarity = SequenceMatcher(None, label_data.lower(), extract_data.lower()).ratio()

    if similarity > text_similarity_threshold:
        # print(f"TP, {label_data}, {extract_data}, {similarity}")
        return 'TP'
    else:
        # print(f"FP, {label_data}, {extract_data}, {similarity}")
        return "FP"


In [40]:
def numeric_comoparison(id, label_value, extracted_value, numerical_tolerance = 0.027):
    print(id)
    print(f"label value: {label_value[id]}, {type(label_value[id])}")
    print(f"extract value: {extracted_value[id]}, {type(extracted_value[id])}")
    if (label_value[id]!=None) & (extracted_value[id]==None):
        print(f"FN, {label_value[id]}, {extracted_value[id]}")
        return "FN"
    elif (label_value[id]==None) & (extracted_value[id]!=None):
        print(f"TN, {label_value[id]}, {extracted_value[id]}")
        return "TN"
    elif (label_value[id]==None) & (extracted_value[id]==None):
        ##Anotation failed to extract and extraction didn't extract. This is TP
        print(f"TP, {label_value[id]}, {extracted_value[id]}")
        return "TP"
    # Apply numerical tolerance check
    elif (abs(label_value[id] - extracted_value[id])) / (abs(label_value[id]) )<= numerical_tolerance:

        print(f"Numerical differences matched: {id} {label_value[id]}, {extracted_value[id]}, absolute difference {(abs(label_value[id] - extracted_value[id])) / (abs(label_value[id]) )}")
        return "TP"  # True Positive: Correct numerical extraction
    else:
        print(f"Numerical differences no match: {id}, {label_value[id]}, {extracted_value[id]}, absolute difference {(abs(label_value[id] - extracted_value[id])) / (abs(label_value[id]) )}")
        return "FP"  # False Positive: Incorrect numerical extraction    

In [41]:
def compare_json(df):
    """
    Compare labeled and extracted JSON data for correctness.

    TP: Correct value extracted by LLM.
    FN: LLM didn't extract this variable.
    FP: LLM extracted a value, but it was incorrect.
    TN: LLM halucinated and returned value that was not extracted
    """
    
    outside_variables = ['perovskite_composition', 'electron_transport_layer', 'hole_transport_layer', 'structure_pin_nip', "passivating_molecule", 'control_pce', 'treated_pce', 'control_voc', 'treated_voc']
    outside_text = ['perovskite_composition', 'electron_transport_layer', 'hole_transport_layer', 'structure_pin_nip', "passivating_molecule"]
    
    stability_entity = ['stability_type', 'temperature', 'time', 'humidity', 'efficiency_cont', 'efficiency_tret']

    # Initialize comparison dictionaries
    text_dict = {var: {"TP": 0, "FP": 0, "FN": 0, "TN": 0} for var in outside_variables}
    stability_dict = {var: {"TP": 0, "FP": 0, "FN": 0, "TN": 0} for var in stability_entity}

    for row in df.itertuples():       
        label_value = row.annotation
        extracted_value = row.extracted

        # print(label_value)
        # print(extracted_value)

        for id, label in label_value.items():
            if ('test' in id) and (isinstance(label_value[id], dict)):
                ##Plan for stability test evaluation
                '''
                For each stability condition in annotation, 
                    Pair them with stability condition in extracted
                        With stability of annotation and extraction, use function tests_comparison that returns how similar 2 stabilities are
                    
                Once all the pair is calculated, find the stability name of extraction that was closest to annotation stability. 

                Using this dictionary, we will increment FN, FP, TN, TP for each element of the entity.
                '''
                matched = 0
                stability_match = {}
                for extract_id, extract_label in extracted_value.items():
                    if ('test' in extract_id) and (isinstance(extracted_value[extract_id], dict)):
                        matched += 1
                        match_list = tests_comparison(id, label, extract_id, extract_label)
                        match_list = [0 if item is None else item for item in match_list]
                        # print(extracted_value[extract_id])
                        # print(match_list)
                        stability_match[extract_id] = match_list
        
                if matched == 0:
                    #No stability were extracted, we will add stability_unmatched
                        ##We need to account for if there was NO stability extracted. 
                    for key in stability_dict:
                        if 'FN' in stability_dict[key]:
                            stability_dict[key]['FN'] += 1
                else:
                    stability_match_mean = {stability: np.mean(lis) for stability, lis in stability_match.items()}
                    max_key = max(stability_match_mean, key=stability_match_mean.get)  
                    # print(extracted_value[max_key])
                    ##Now, I need to compare each entity in that found max_key and fill in that FN, dictionary.
                    for entity in label_value[id].keys():
                        if entity == 'efficiency_control':
                            continue
                        if entity == 'perovskite_molecule':
                            continue
                        entity_result = entity_comparison(entity, label, extracted_value[max_key])
                        stability_dict[entity][entity_result] += 1  
            else:  
                if id in outside_text:
                    result = text_comparison(id, label_value, extracted_value)
                    text_dict[id][result] += 1
                else:
                    result = numeric_comoparison(id, label_value, extracted_value)
                    text_dict[id][result] += 1


    # Merge all results
    combined_dict = {**text_dict, **stability_dict}
    # print("Performance for each variable in dictionary:", combined_dict)

    # Compute precision, recall, and F1-score
    variable_list, precision_list, recall_list, f1_list = [], [], [], []
    for variable, performance in combined_dict.items():
        TP, FP, FN = performance["TP"], performance["FP"], performance["FN"]
        
        precision = safe_division(TP, TP + FP)
        recall = safe_division(TP, TP + FN)
        f1 = safe_division(2 * precision * recall, precision + recall)

        variable_list.append(variable)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)

    return combined_dict, variable_list, precision_list, recall_list, f1_list

In [42]:
dict_result_base, variables_base, precisions_base, recalls_base, f1s_base = compare_json(evaluate_df_base)

control_pce
label value: 24.0, <class 'float'>
extract value: None, <class 'NoneType'>
FN, 24.0, None
treated_pce
label value: 26.9, <class 'float'>
extract value: None, <class 'NoneType'>
FN, 26.9, None
control_voc
label value: None, <class 'NoneType'>
extract value: None, <class 'NoneType'>
TP, None, None
treated_voc
label value: 1.18, <class 'float'>
extract value: None, <class 'NoneType'>
FN, 1.18, None
control_pce
label value: 22.3, <class 'float'>
extract value: 22.39, <class 'float'>
Numerical differences matched: control_pce 22.3, 22.39, absolute difference 0.004035874439461877
treated_pce
label value: 24.0, <class 'float'>
extract value: 24.59, <class 'float'>
Numerical differences matched: treated_pce 24.0, 24.59, absolute difference 0.02458333333333333
control_voc
label value: None, <class 'NoneType'>
extract value: 1.098, <class 'float'>
TN, None, 1.098
treated_voc
label value: 1.18, <class 'float'>
extract value: 1.185, <class 'float'>
Numerical differences matched: treate

In [95]:
dict_result_train, variables_train, precisions_train, recalls_train, f1s_train = compare_json(evaluate_df_train)

NameError: name 'evaluate_df_train' is not defined

In [43]:
dict_result_base

{'perovskite_composition': {'TP': 51, 'FP': 40, 'FN': 0, 'TN': 3},
 'electron_transport_layer': {'TP': 10, 'FP': 54, 'FN': 10, 'TN': 20},
 'hole_transport_layer': {'TP': 39, 'FP': 28, 'FN': 11, 'TN': 16},
 'structure_pin_nip': {'TP': 31, 'FP': 43, 'FN': 3, 'TN': 17},
 'passivating_molecule': {'TP': 21, 'FP': 70, 'FN': 3, 'TN': 0},
 'control_pce': {'TP': 34, 'FP': 24, 'FN': 16, 'TN': 20},
 'treated_pce': {'TP': 46, 'FP': 35, 'FN': 8, 'TN': 5},
 'control_voc': {'TP': 46, 'FP': 7, 'FN': 11, 'TN': 30},
 'treated_voc': {'TP': 51, 'FP': 13, 'FN': 7, 'TN': 23},
 'stability_type': {'TP': 11, 'FP': 83, 'FN': 2, 'TN': 6},
 'temperature': {'TP': 34, 'FP': 16, 'FN': 0, 'TN': 52},
 'time': {'TP': 50, 'FP': 31, 'FN': 15, 'TN': 6},
 'humidity': {'TP': 17, 'FP': 10, 'FN': 13, 'TN': 62},
 'efficiency_cont': {'TP': 0, 'FP': 30, 'FN': 19, 'TN': 53},
 'efficiency_tret': {'TP': 0, 'FP': 72, 'FN': 27, 'TN': 3}}

In [161]:
dict_result_train

{'perovskite_composition': {'TP': 42, 'FP': 50, 'FN': 9, 'TN': 25},
 'electron_transport_layer': {'TP': 20, 'FP': 63, 'FN': 1, 'TN': 42},
 'hole_transport_layer': {'TP': 41, 'FP': 45, 'FN': 2, 'TN': 38},
 'structure_pin_nip': {'TP': 27, 'FP': 56, 'FN': 10, 'TN': 33},
 'stability_type': {'TP': 0, 'FP': 48, 'FN': 166, 'TN': 0},
 'temperature': {'TP': 52, 'FP': 48, 'FN': 0, 'TN': 114},
 'time': {'TP': 44, 'FP': 57, 'FN': 66, 'TN': 47},
 'humidity': {'TP': 30, 'FP': 16, 'FN': 29, 'TN': 139},
 'passivating_molecule': {'TP': 37, 'FP': 94, 'FN': 5, 'TN': 78},
 'efficiency_cont': {'TP': 7, 'FP': 58, 'FN': 29, 'TN': 120},
 'efficiency_tret': {'TP': 30, 'FP': 143, 'FN': 2, 'TN': 39},
 'control_pce': {'TP': 38, 'FP': 52, 'FN': 6, 'TN': 118},
 'treated_pce': {'TP': 71, 'FP': 80, 'FN': 2, 'TN': 61},
 'control_voc': {'TP': 44, 'FP': 10, 'FN': 7, 'TN': 153},
 'treated_voc': {'TP': 65, 'FP': 23, 'FN': 4, 'TN': 122}}

## Calculate Macro f1 score

In [44]:
def macro_f1(f1_list, weight = None):
    if weight == None:
        #If no weight given, do unweighted average of f1 score
        return sum(f1_list) / len(f1_list)
    total_f1 = 0
    for i in range(len(f1_list)):
        total_f1 += (f1_list[i] * weight[i])
    return total_f1 / sum(weight)
    


In [45]:
# Define column names
columns = ['Macro F1 score weight distribution', 'Base Deepseek 8bit', 'Fine-Tuned Deepseek 4 bit', 'Fine-Tuned Deepseek 8 bit', 'Llama 3 billion parameter']

# Create a DataFrame with NaN values
df_f1scores = pd.DataFrame(np.nan, index=[0, 1, 2, 3, 4, 5, 6], columns=columns)
df_f1scores

Unnamed: 0,Macro F1 score weight distribution,Base Deepseek 8bit,Fine-Tuned Deepseek 4 bit,Fine-Tuned Deepseek 8 bit,Llama 3 billion parameter
0,,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,
5,,,,,
6,,,,,


In [46]:
## Unweighted
macro_train_0 = macro_f1(f1s_train)
macro_train_0


NameError: name 'f1s_train' is not defined

In [48]:
macro_base_0 = macro_f1(f1s_base)
macro_base_0

0.5228305047746206

In [49]:
## unweighted row value
unweighted = ['Macro F1 score with equal weight', macro_base_0, None, None, None]
df_f1scores.loc[0] = unweighted


In [50]:
variables_train

NameError: name 'variables_train' is not defined

In [51]:
weights_1 = [1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1]

In [52]:
macro_train_1 = macro_f1(f1s_train, weight = weights_1)
macro_train_1

NameError: name 'f1s_train' is not defined

In [53]:
macro_base_1 = macro_f1(f1s_base, weight = weights_1)
macro_base_1

0.5414419522170346

In [54]:
## Heavier weight on stability value
first_f1 = ['Heavier weight on stability', macro_base_1, None, None, None]
df_f1scores.loc[1] = first_f1

In [55]:
variables_train

NameError: name 'variables_train' is not defined

In [56]:
weights_2 = [2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1]

In [57]:
macro_train_2 = macro_f1(f1s_train, weight = weights_2)
macro_train_2

NameError: name 'f1s_train' is not defined

In [58]:
macro_base_2 = macro_f1(f1s_base, weight = weights_2)
macro_base_2

0.5437834491690354

In [59]:
## Heavier weight on perovskite structure
first_f2 = ['Heavier weight on perovskite structure', macro_base_2, None, None, None]
df_f1scores.loc[2] = first_f2

In [60]:
variables_train

NameError: name 'variables_train' is not defined

In [61]:
weights_3 = [1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2]

In [62]:
macro_train_3 = macro_f1(f1s_train, weight = weights_3)
macro_train_3

NameError: name 'f1s_train' is not defined

In [63]:
macro_base_3 = macro_f1(f1s_base, weight = weights_3)
macro_base_3

0.5119369308405363

In [64]:
## Heavier weight on numeric data
first_f3 = ['Heavier weight on numeric data', macro_base_3, None, None, None]
df_f1scores.loc[3] = first_f3

In [65]:
variables_train

NameError: name 'variables_train' is not defined

In [66]:
weights_4 = [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1]

In [67]:
macro_train_4 = macro_f1(f1s_train, weight = weights_4)
macro_train_4

NameError: name 'f1s_train' is not defined

In [68]:
macro_base_4 = macro_f1(f1s_base, weight = weights_4)
macro_base_4

0.5377166652488992

In [70]:
## Weight to perform prediction 1
first_f4 = ['Weight to perform prediction 1', macro_base_4, None, None, None]
df_f1scores.loc[4] = first_f4

In [71]:
variables_train

NameError: name 'variables_train' is not defined

In [72]:
weights_5 = [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1]

In [73]:
macro_train_5 = macro_f1(f1s_train, weight = weights_5)
macro_train_5

NameError: name 'f1s_train' is not defined

In [74]:
macro_base_5 = macro_f1(f1s_base, weight = weights_5)
macro_base_5

0.47263302797415196

In [75]:
## Weight to perform prediction 2
first_f5 = ['Weight to perform prediction 2', macro_base_5, None, None, None]
df_f1scores.loc[5] = first_f5

In [76]:
variables_train

NameError: name 'variables_train' is not defined

In [77]:
weights_6 = [1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0]

In [78]:
macro_train_6 = macro_f1(f1s_train, weight = weights_6)
macro_train_6

NameError: name 'f1s_train' is not defined

In [79]:
macro_base_6 = macro_f1(f1s_base, weight = weights_6)
macro_base_6

0.6309655651037794

In [81]:
## Weight to perform prediction 3
first_f6 = ['Weight to perform prediction 3', macro_base_6, None, None, None]
df_f1scores.loc[6] = first_f6

In [82]:
df_f1scores

Unnamed: 0,Macro F1 score weight distribution,Base Deepseek 8bit,Fine-Tuned Deepseek 4 bit,Fine-Tuned Deepseek 8 bit,Llama 3 billion parameter
0,Macro F1 score with equal weight,0.522831,,,
1,Heavier weight on stability,0.541442,,,
2,Heavier weight on perovskite structure,0.543783,,,
3,Heavier weight on numeric data,0.511937,,,
4,Weight to perform prediction 1,0.537717,,,
5,Weight to perform prediction 2,0.472633,,,
6,Weight to perform prediction 3,0.630966,,,


### Different weight to consider
- Unweight
- Heavier weight on stability
- Heavier weight on perovskite structure
- Heavier weight on numeric data
- Weight to perform prediction 1
- Weight to perform prediction 2
- Weight to perform prediction 3