# Expand the annotation from evaluation_final and perform some inspection

In [8]:
import pandas as pd
import ast

# Load the dataset
annotation_df = pd.read_csv("annotation.csv")


In [9]:
# Convert string representation of dictionary into actual dictionary
expanded_data = []
for index, row in annotation_df.iterrows():
    row_dict = ast.literal_eval(row['output'])  # Convert string to dictionary
    
    # Extract common fields
    common_fields = {
        "paper_num": row['first_num'],  # Track the first_num
        "perovskite_composition": row_dict.get("perovskite_composition"),
        "electron_transport_layer": row_dict.get("electron_transport_layer"),
        "hole_transport_layer": row_dict.get("hole_transport_layer"),
        "structure_pin_nip": row_dict.get("structure_pin_nip"),
    }

    # Extract test data
    for key, test_data in row_dict.items():
        if key.startswith("test_"):
            test_row = common_fields.copy()
            test_row["test"] = key  # Store test name
            test_row.update(test_data)  # Merge test details
            expanded_data.append(test_row)

# Convert list of dictionaries into DataFrame
df_expanded = pd.DataFrame(expanded_data)

# # Fill missing passivating_molecule values based on the first test in each group
# df_expanded['passivating_molecule'] = df_expanded.groupby('perovskite_composition')['passivating_molecule'].transform(lambda x: x.ffill())

In [10]:
df_expanded

Unnamed: 0,paper_num,perovskite_composition,electron_transport_layer,hole_transport_layer,structure_pin_nip,test,stability_type,passivating_molecule,humidity,temperature,time,control_pce,treated_pce,control_voc,treated_voc,efficiency_control,efficiency_tret,efficiency_cont
0,0,Cs0.05FA0.85MA0.1PbI3,C60,2PACz and Me-4PACz,PIN,test_1,ISOSL,4-chlorobenzenesulfonate (4Cl-BZS),,65.0,1200.0,24.0,26.9,,1.18,,95.0,
1,1,,TinOxide,PTAA,PIN,test_1,ISOST,phenethylammonium,,85.0,500.0,,19.1,,1.16,,,
2,2,(BA)2PbI 4,tin dioxide,Spiro-OMeTAD,NIP,test_1,ISOSL,,85.0,25.0,1620.0,22.3,24.3,,1.18,,98.0,58.6
3,2,(BA)2PbI 4,tin dioxide,Spiro-OMeTAD,NIP,test_1_2,ISOSD,,85.0,85.0,1056.0,,21.3,,,,94.0,
4,2,(BA)2PbI 4,tin dioxide,Spiro-OMeTAD,NIP,test_2,ISOSLT,,,25.0,1620.0,,24.0,,,,98.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,147,FAPbI 3,,Spiro-OMeTAD,NIP,test_1,ISOSD,carbazole-triphenylamine and phenylammonium io...,85.0,85.0,1000.0,22.3,,,1.11,,92.3,
228,147,FAPbI 3,,Spiro-OMeTAD,NIP,test_2,ISOSLT,carbazole-triphenylamine and phenylammonium io...,,,1100.0,,24.7,,,,94.6,66.6
229,148,Cs0.05FA0.85MA0.10Pb(I0.97Br0.03)3,,Spiro-OMeTAD,NIP,test_1_2,ISOSLT,phenylethylammonium iodide,50-70,,500.0,,,,,,84.0,70.0
230,148,Cs0.05FA0.85MA0.10Pb(I0.97Br0.03)3,,Spiro-OMeTAD,NIP,test_1,,4-tert-butyl-benzylammonium iodide,,,,21.2,22.7,1.09,1.12,,95.0,


# Inspect passivating molecule

In [12]:
data_inspect = df_expanded['passivating_molecule'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

ortho-(phenylene)di(ethylammonium) iodide
phenethylammonium iodide
fluorophenylethylammonium iodide
PCBM
butylammonium
FAPbI3
oleylammonium iodide
poly(methyl methacrylate)
ethylenediammonium diiodide
phenylethylammonium iodide
carbazole-triphenylamine and phenylammonium iodide units
3-diammonium iodide,propane-1
MA +
4-tert-butyl-benzylammonium iodide
2-thiopheneethylammonium iodide
(phenethylamino)methaniminium iodide
MAPbBr3
quanternary ammonium halides
n-hexyl trimethyl ammonium bromide
europium ion pair
FA
ortho-carborane
4-fluoroaniline
oleylamine
4-trifluorophenylethylammonium iodide
chlorine-rich mixed-halide perovskite interlayer
lead iodide
formate
4-fluorophenylethylammonium iodide
Piperazinium Diiodide
piperazinium iodide
Titanium dioxide
Butylammonium bromide
ferrocenyl-bis-thiophene-2-carboxylate
lead oxalate
zinc-based halogenometallate
sodium thioglycolate
4-chlorobenzenesulfonate (4Cl-BZS)
NOTE:This is how to prepare the PSC,relevent...?
CF3 -PEAI
butylammonium iodide


### Passivating issues
Can't convert into SMILES
- iso-butylamine iodide --> paper 5: butylamine iodide is convertable
- EDBE --> 
- intermediate negative Î”E vac
- chlorophenylethylammonium iodide
- fluorophenylethylammonium iodide
- CF3-phenethylammonium
- 3,4,5-trifluoroanilinium
- cyclohexylmethylammonium iodide dissolved in
- tri-octyl phosphine oxide
- azetidinium lead iodide
- DMePDAI 2
- NIP
- NOTE:This is how to prepare the PSC,relevent...?
- poly(methyl methacrylate)
- CF3 -PEAI
- 4-fluorophenylethylammonium iodide
- (phenethylamino)methaniminium iodide
- Methylammonium Lead Bromide
- HTAB
- FAPbI3
- MAPbI 3
- NIP
- ISOSLT
- ferrocenyl-bis-thiophene-2-carboxylate
- fluorophenylethylammonium iodide
- 4-trifluoromethyl-phenylammonium


In [17]:
passivatin = df_expanded[["paper_num","passivating_molecule"]]
passivatin = passivatin[passivatin['passivating_molecule'] == "EDBE"]
passivatin

Unnamed: 0,paper_num,passivating_molecule
10,9,EDBE
