# Expand the annotation from evaluation_final and perform some inspection

In [13]:
import pandas as pd
import ast

# Load the dataset
annotation_df = pd.read_csv("annotation.csv")


In [14]:
# Convert string representation of dictionary into actual dictionary
expanded_data = []
for index, row in annotation_df.iterrows():
    row_dict = ast.literal_eval(row['output'])  # Convert string to dictionary
    
    # Extract common fields
    common_fields = {
        "paper_num": row['first_num'],  # Track the first_num
        "perovskite_composition": row_dict.get("perovskite_composition"),
        "electron_transport_layer": row_dict.get("electron_transport_layer"),
        "hole_transport_layer": row_dict.get("hole_transport_layer"),
        "structure_pin_nip": row_dict.get("structure_pin_nip"),
    }

    # Extract test data
    for key, test_data in row_dict.items():
        if key.startswith("test_"):
            test_row = common_fields.copy()
            test_row["test"] = key  # Store test name
            test_row.update(test_data)  # Merge test details
            expanded_data.append(test_row)

# Convert list of dictionaries into DataFrame
df_expanded = pd.DataFrame(expanded_data)

# # Fill missing passivating_molecule values based on the first test in each group
# df_expanded['passivating_molecule'] = df_expanded.groupby('perovskite_composition')['passivating_molecule'].transform(lambda x: x.ffill())

In [15]:
df_expanded

Unnamed: 0,paper_num,perovskite_composition,electron_transport_layer,hole_transport_layer,structure_pin_nip,test,stability_type,passivating_molecule,humidity,temperature,time,control_pce,treated_pce,control_voc,treated_voc,efficiency_control,efficiency_tret,efficiency_cont
0,0,Cs0.05FA0.85MA0.1PbI3,C60,2PACz and Me-4PACz,PIN,test_1,ISOSL,4-chlorobenzenesulfonate (4Cl-BZS),,65.0,1200.0,24.0,26.9,,1.18,,95.0,
1,1,,TinOxide,PTAA,PIN,test_1,ISOST,phenethylammonium,,85.0,500.0,,19.1,,1.16,,,
2,2,(BA)2PbI 4,tin dioxide,Spiro-OMeTAD,NIP,test_1,ISOSL,,85.0,25.0,1620.0,22.3,24.3,,1.18,,98.0,58.6
3,2,(BA)2PbI 4,tin dioxide,Spiro-OMeTAD,NIP,test_1_2,ISOSD,,85.0,85.0,1056.0,,21.3,,,,94.0,
4,2,(BA)2PbI 4,tin dioxide,Spiro-OMeTAD,NIP,test_2,ISOSLT,,,25.0,1620.0,,24.0,,,,98.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,147,FAPbI 3,,Spiro-OMeTAD,NIP,test_1,ISOSD,carbazole-triphenylamine and phenylammonium io...,85.0,85.0,1000.0,22.3,,,1.11,,92.3,
228,147,FAPbI 3,,Spiro-OMeTAD,NIP,test_2,ISOSLT,carbazole-triphenylamine and phenylammonium io...,,,1100.0,,24.7,,,,94.6,66.6
229,148,Cs0.05FA0.85MA0.10Pb(I0.97Br0.03)3,,Spiro-OMeTAD,NIP,test_1_2,ISOSLT,phenylethylammonium iodide,50-70,,500.0,,,,,,84.0,70.0
230,148,Cs0.05FA0.85MA0.10Pb(I0.97Br0.03)3,,Spiro-OMeTAD,NIP,test_1,,4-tert-butyl-benzylammonium iodide,,,,21.2,22.7,1.09,1.12,,95.0,


# Efficiency cont Check - Done

In [9]:
data_inspect = df_expanded['efficiency_cont'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

60.0
80.0
50.0
70.0
40.0
0.0
65.0
27.0
20.0
84.0
63.0
30.0
76.0
85.0
38.0
68.0
58.0
71.0
87.0
4.0
83.0
92.7
96.5
58.6
75.8
6.0
55.0
73.2
56.0
75.6
78.0
10.0
89.0
77.6
71.3
74.0
70.8
0.6
98.0
82.0
90.0
51.0
47.0
9.65
50.6
59.1
57.0
95.0
29.0
44.0
93.0
32.0
49.0
67.2
25.0
0.7
94.0
66.6


In [12]:
efficiency_cont = df_expanded[["paper_num","efficiency_cont"]]
efficiency_cont = efficiency_cont[efficiency_cont['efficiency_cont'] == 0.7]
efficiency_cont

Unnamed: 0,paper_num,efficiency_cont
102,82,0.7


# Efficiency treat Check - Done

In [4]:
data_inspect = df_expanded['efficiency_tret'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

90.0
95.0
80.0
100.0
92.0
85.0
96.0
91.0
98.0
97.0
99.0
86.0
93.0
94.0
74.0
75.0
82.0
88.0
53.0
98.2
87.0
73.0
84.0
65.0
45.0
98.7
92.2
97.5
95.2
92.3
0.72
91.8
90.5
88.7
89.0
94.5
67.0
71.0
98.9
60.0
0.91
95.7
0.85
91.5
104.0
47.9
0.6
96.8
93.8
96.2
0.92
35.0
81.0
83.0
82.1
91.1
96.7
1.0
94.6


In [8]:
efficiency_tret = df_expanded[["paper_num","efficiency_tret"]]
efficiency_tret = efficiency_tret[efficiency_tret['efficiency_tret'] == 0.92]
efficiency_tret

Unnamed: 0,paper_num,efficiency_tret
106,84,0.92


# Composition - DONE

In [20]:
data_inspect = df_expanded['perovskite_composition'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

FAPbI 3
(FAPbI3)0.95(MAPbBr3)0.05
MAPbI 3
MAPbI3
FA0.85MA0.1Cs0.05PbI2.9Br0.1
Cs0.1FA0.9PbI3
Cs 0.05(FA0.98MA0.02)0.95Pb(I0.98Br0.02)3
FAPbI3
FA0.83Cs0.17PbI2.7Br0.3
FA0.98MA0.02Pbl3
FA0.98Cs0.02PbI3
Cs0.15FA0.85PbI2.55Br0.45
MAPbBr 3
(FAPbI3)0.97(MC)0.03
F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
Cs0.05(FAPbI3)0.85(MAPbBr3)0.15
(BA)2PbI 4
Rb0.05Cs0.05MA0.05FA0.85Pb(I0.95Br0.05)3
Cs0.05FA0.81MA0.14PbI2.55Br0.45
Cs0.05MA0.1FA0.85PbI3
FA0.95Cs0.05PbI3
Cs0.05FA0.85MA0.10Pb(I0.97Br0.03)3
Cs 0.05 (FA0.92MA0.08)0.95Pb(I0.92Br0.08)3
FA0.83MA0.17Pb-(I0.83Br0.17)3
Cs0.12FA0.8MA0.08PbI1.8Br1
Cs 0.05(FA0.95MA0.05)0.95Pb(I0.95Br0.05)3
Cs 0.05 MA 0.05 FA 0.9 Pb(I 0.95 Br 0.05 ) 3
Cs0.05FA0.95PbI3
Cs0.05(FA0.95MA0.05)0.95Pb(I0.95Br0.05)3
Cs0.05MA0.10FA0.85PbI3
(FA0.98MA0.02)0.95Cs0.05Pb(I0.95Br0.02)3
FA0.8Cs0.2Pb(I0.7Br0.3)3
Cs0.05FA0.9MA0.05Pb(I0.95Br0.05)3
Cl-contained FAPbI3
(FAPbI3)0.77(MAPbBr3)0.14(CsPbI3)0.09
MA0.7FA0.3PbI3
Cs0.05(MA0.17FA0.83)0.95Pb(I0.83Br0.17)3
Cs0.05(MA0.05FA0.95)0.95Pb(I

## Perovskite Composition problem
- F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
- Cl-contained FAPbI3


In [24]:
composition = df_expanded[["paper_num","perovskite_composition"]]
composition = composition[composition['perovskite_composition'] == "F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45"]
composition

Unnamed: 0,paper_num,perovskite_composition
112,88,F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
113,88,F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
114,88,F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45


# Inspect passivating molecule

In [12]:
data_inspect = df_expanded['passivating_molecule'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

ortho-(phenylene)di(ethylammonium) iodide
phenethylammonium iodide
fluorophenylethylammonium iodide
PCBM
butylammonium
FAPbI3
oleylammonium iodide
poly(methyl methacrylate)
ethylenediammonium diiodide
phenylethylammonium iodide
carbazole-triphenylamine and phenylammonium iodide units
3-diammonium iodide,propane-1
MA +
4-tert-butyl-benzylammonium iodide
2-thiopheneethylammonium iodide
(phenethylamino)methaniminium iodide
MAPbBr3
quanternary ammonium halides
n-hexyl trimethyl ammonium bromide
europium ion pair
FA
ortho-carborane
4-fluoroaniline
oleylamine
4-trifluorophenylethylammonium iodide
chlorine-rich mixed-halide perovskite interlayer
lead iodide
formate
4-fluorophenylethylammonium iodide
Piperazinium Diiodide
piperazinium iodide
Titanium dioxide
Butylammonium bromide
ferrocenyl-bis-thiophene-2-carboxylate
lead oxalate
zinc-based halogenometallate
sodium thioglycolate
4-chlorobenzenesulfonate (4Cl-BZS)
NOTE:This is how to prepare the PSC,relevent...?
CF3 -PEAI
butylammonium iodide


### Passivating issues
Issue raised by Kang and ChatGPT
- iso-butylamine iodide --> paper 5: butylamine iodide is convertable
- EDBE --> paper 9: wasn't the tested passivating. Fixed.
- MAPbBr3 --> paper 79: Done this was perovskite molecule, not compostion not passivating
- FAPbI3 --> paper 89 Changed into perovskite composition, but this there is no passivation in this paper. 
- FAPbI3 --> paper 91 Paper about additive and no passivation was mentioned
- chlorophenylethylammonium iodide --> paper 14: this was 4-chlorophenylethylammonium iodide and an additive.
- fluorophenylethylammonium iodide --> paper 15: Talking with kelly if this is consistently passivating or additive
- fluorophenylethylammonium iodide --> paper 143: fluorophenylethylammonium lead iodide was passivating. Fixed. 
- 3,4,5-trifluoroanilinium --> paper 19: Can't understand if this is additive or passivating. Asking Kelly. 
- tri-octyl phosphine oxide --> paper 36 There was no metric associated with this passivating. 
- azetidinium lead iodide --> paper 42 nothing wrong, perfect paper 
- DMePDAI 2 --> paper 53 Dimethylpyrroline Diammonium Iodide
- (phenethylamino)methaniminium iodide --> paper 77 nothing wrong, perfect paper
- NOTE:This is how to prepare the PSC,relevent...? --> paper 60 Fixed. the treatment was 4-vinylbenzylammonium bromide
- lead iodide --> paper 102 perovskite molecule. This paper had no passivating. skip. 
- formate --> paper 110 formate is an additive to composition. Passivating was not mentioned, skip
- europium ion pair --> Already reviewed, they were no passivation, so skiped.
- ortho-carborane --> paper 146 This is indeed passivating. Additionally, CB-NH2 is also another passivating. 
- "OATsO" & "OABF4" ---> paper 10



<br>
- CF3-phenethylammonium
- PCBM
- 4-fluorophenylethylammonium iodide
- 4-trifluoromethyl-phenylammonium
- poly(methyl methacrylate)
- quanternary ammonium halides
- ferrocenyl-bis-thiophene-2-carboxylate
- lead oxalate
- sodium thioglycolate
- 4-chlorobenzenesulfonate (4Cl-BZS)
- CF3 -PEAI
- PS Plastic Foam 1%


In [90]:
passivatin = df_expanded[["paper_num","passivating_molecule"]]
passivatin = passivatin[passivatin['passivating_molecule'] == "OABF4"]
passivatin

Unnamed: 0,paper_num,passivating_molecule
12,10,OABF4


In [81]:
passivatin = df_expanded[["paper_num","passivating_molecule"]]
passivatin = passivatin[passivatin['passivating_molecule'] == "2-thiopheneethylammonium iodide"]
passivatin

Unnamed: 0,paper_num,passivating_molecule
95,75,2-thiopheneethylammonium iodide


In [86]:
import pubchempy as pcp
import numpy as np

In [87]:
def fetch_smiles_from_name(molecule_name):
    try:
        # Search for the molecule in PubChem by name
        compounds = pcp.get_compounds(molecule_name, 'name')
        if compounds:
            return compounds[0].isomeric_smiles  # Return the first match's SMILES
        else:
            return np.nan
    except Exception as e:
        print(f"Error fetching SMILES for {molecule_name}: {e}")
        return None

In [88]:
fetch_smiles_from_name("2-thiopheneethylammonium iodide")

nan