# Expand the annotation from evaluation_final and perform some inspection

In [98]:
import pandas as pd
import ast

# Load the dataset
annotation_df = pd.read_csv("q1_submission_notebooks/annotation.csv")


In [99]:
annotation_df.iloc[4]['output']

"{'perovskite_composition': '(MAPbBr3)0.05(FAPbI3)0.95', 'electron_transport_layer': 'tin dioxide', 'hole_transport_layer': 'Spiro-OMeTAD', 'structure_pin_nip': 'NIP'}"

In [100]:
# Convert string representation of dictionary into actual dictionary
expanded_data = []
for index, row in annotation_df.iterrows():
    row_dict = ast.literal_eval(row['output'])  # Convert string to dictionary
    
    # Extract common fields
    common_fields = {
        "paper_num": row['first_num'],  # Track the first_num
        "perovskite_composition": row_dict.get("perovskite_composition"),
        "electron_transport_layer": row_dict.get("electron_transport_layer"),
        "hole_transport_layer": row_dict.get("hole_transport_layer"),
        "structure_pin_nip": row_dict.get("structure_pin_nip"),
    }

    # Extract test data
    found_test = 0
    for key, test_data in row_dict.items():
        if key.startswith("test_"):
            test_row = common_fields.copy()
            test_row["test"] = key  # Store test name
            test_row.update(test_data)  # Merge test details
            expanded_data.append(test_row)
            found_test += 1
    if found_test == 0:
        test_row = common_fields.copy()
        test_row["test"] = None 
        expanded_data.append(test_row)

# Convert list of dictionaries into DataFrame
df_expanded = pd.DataFrame(expanded_data)

# # Fill missing passivating_molecule values based on the first test in each group
# df_expanded['passivating_molecule'] = df_expanded.groupby('perovskite_composition')['passivating_molecule'].transform(lambda x: x.ffill())

In [101]:
len(df_expanded['paper_num'].unique())

148

In [102]:
df_expanded = df_expanded.drop(['test'], axis=1)
df_expanded

Unnamed: 0,paper_num,perovskite_composition,electron_transport_layer,hole_transport_layer,structure_pin_nip,stability_type,passivating_molecule,humidity,temperature,time,control_pce,treated_pce,control_voc,treated_voc,efficiency_cont,efficiency_tret,perovskite_molecule
0,0,Cs0.05FA0.85MA0.1PbI3,C60,2PACz,PIN,ISOSL3,4-chlorobenzenesulfonate,50.0,65.0,1200.0,24.0,26.9,,1.18,,95.0,
1,0,Cs0.05FA0.85MA0.1PbI3,C60,2PACz,PIN,ISOSL3,4-chlorobenzenesulfonate,50.0,85.0,540.0,24.0,26.9,,,,87.0,
2,0,Cs0.05FA0.85MA0.1PbI3,C60,2PACz,PIN,ISOSD2,4-chlorobenzenesulfonate,,85.0,1500.0,24.0,26.9,,,,95.0,
3,1,,TinOxide,PTAA,PIN,ISOST,phenethylammonium,,85.0,500.0,,19.1,,1.16,,,
4,2,(FAPbI3)0.95(MAPbBr 3)0.05,tin dioxide,Spiro-OMeTAD,NIP,ISOSLT,butylammonium lead tetra iodide,,,1620.0,,24.0,,,,98.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
268,147,FAPbI 3,Tin dioxide,Spiro-OMeTAD,NIP,ISOSD,carbazole-triphenylamine and phenylammonium io...,85.0,85.0,1000.0,22.3,24.7,1.11,1.16,,92.3,
269,147,FAPbI 3,Tin dioxide,Spiro-OMeTAD,NIP,ISOSLT,carbazole-triphenylamine and phenylammonium io...,,,1100.0,22.3,24.7,1.11,1.16,66.6,94.6,
270,148,Cs0.05FA0.85MA0.10Pb(I0.97Br0.03)3,Titanium dioxide,Spiro-OMeTAD,NIP,ISOSLT,phenylethylammonium iodide,50-70,,500.0,21.2,22.7,1.09,1.12,70.0,84.0,
271,148,Cs0.05FA0.85MA0.10Pb(I0.97Br0.03)3,Titanium dioxide,Spiro-OMeTAD,NIP,ISOSLT,4-tert-butyl-benzylammonium iodide,50-70,,500.0,21.2,23.5,1.09,1.14,70.0,95.0,


# Inspecting passivating molecule for submission

In [103]:
## Smile detection code
import pubchempy as pcp
import numpy as np
import requests

def fetch_smiles(molecule_name):
    try:
        # Try fetching SMILES from PubChem
        compounds = pcp.get_compounds(molecule_name, 'name')
        if compounds and compounds[0].isomeric_smiles:
            print("smile found with pubchempy")
            return compounds[0].isomeric_smiles  # Return first match's SMILES
    except Exception as e:
        print(f"Error fetching from PubChem for {molecule_name}: {e}")

    # If PubChem fails, try OPSIN
    try:
        base_url = "https://opsin.ch.cam.ac.uk/opsin/"
        smiles_url = base_url + molecule_name + ".smi"
        r = requests.get(smiles_url)
        if r.status_code == 200:
            print("smile found on OPSIN API")
            return r.text.strip()  # Remove any trailing newline characters
    except Exception as e:
        print(f"Error fetching from OPSIN for {molecule_name}: {e}")
    print("Both method failed")
    return None  # Return None if both methods fail

# Example usage
    smiles = fetch_smiles("4-chlorobenzenesulfonate")
    print(smiles)

In [104]:
passivating = df_expanded[(df_expanded['passivating_molecule'].notnull()) & (df_expanded['perovskite_composition'].notnull())]
passivating = passivating[['paper_num', 'passivating_molecule']]
passivating = passivating.drop_duplicates(subset=['paper_num', 'passivating_molecule'])
passivating

Unnamed: 0,paper_num,passivating_molecule
0,0,4-chlorobenzenesulfonate
4,2,butylammonium lead tetra iodide
9,5,iso-butylammonium iodide
10,6,phenylethylammonium iodide
12,7,4-tert-butyl-benzylammonium
...,...,...
267,146,ortho-carborane decorated with phenylamino groups
268,147,carbazole-triphenylamine and phenylammonium io...
270,148,phenylethylammonium iodide
271,148,4-tert-butyl-benzylammonium iodide


In [7]:
passivating_smiles = passivating['passivating_molecule'].apply(fetch_smiles)
passivating_smiles

smile found with pubchempy
smile found on OPSIN API
smile found with pubchempy
smile found with pubchempy
smile found on OPSIN API
smile found with pubchempy
smile found on OPSIN API
smile found with pubchempy
Both method failed
Both method failed
Both method failed
smile found on OPSIN API
smile found on OPSIN API
smile found with pubchempy
smile found with pubchempy
smile found with pubchempy
smile found on OPSIN API
smile found on OPSIN API
smile found on OPSIN API
Both method failed
smile found on OPSIN API
smile found on OPSIN API
smile found on OPSIN API
smile found with pubchempy
smile found with pubchempy
smile found with pubchempy
smile found with pubchempy
Both method failed
Both method failed
smile found on OPSIN API
smile found with pubchempy
Both method failed
smile found on OPSIN API
Both method failed
smile found on OPSIN API
smile found with pubchempy
smile found on OPSIN API
smile found with pubchempy
smile found on OPSIN API
smile found on OPSIN API
smile found on OPS

0        C1=CC(=CC=C1S(=O)(=O)[O-])Cl
4         [Pb](I)(I)(I)I.C(CCC)[NH3+]
8                   CC(C)C[NH3+].[I-]
9          C1=CC=C(C=C1)CC[NH3+].[I-]
11     C(C)(C)(C)C1=CC=C(C[NH3+])C=C1
                    ...              
254                              None
255                              None
257        C1=CC=C(C=C1)CC[NH3+].[I-]
258         CC(C)(C)C1=CC=C(C=C1)CN.I
259                              None
Name: passivating_molecule, Length: 73, dtype: object

In [8]:
passivating['smiles'] = passivating_smiles
passivating

Unnamed: 0,paper_num,passivating_molecule,smiles
0,0,4-chlorobenzenesulfonate,C1=CC(=CC=C1S(=O)(=O)[O-])Cl
4,2,butylammonium lead tetra iodide,[Pb](I)(I)(I)I.C(CCC)[NH3+]
8,5,iso-butylammonium iodide,CC(C)C[NH3+].[I-]
9,6,phenylethylammonium iodide,C1=CC=C(C=C1)CC[NH3+].[I-]
11,7,4-tert-butyl-benzylammonium,C(C)(C)(C)C1=CC=C(C[NH3+])C=C1
...,...,...,...
254,146,ortho-carborane decorated with phenylamino groups,
255,147,carbazole-triphenylamine and phenylammonium io...,
257,148,phenylethylammonium iodide,C1=CC=C(C=C1)CC[NH3+].[I-]
258,148,4-tert-butyl-benzylammonium iodide,CC(C)(C)C1=CC=C(C=C1)CN.I


In [9]:
len(passivating['paper_num'].unique())

53

In [10]:
passivating.iloc[3]['passivating_molecule']

'phenylethylammonium iodide'

In [11]:
## Get the rows were passivating_molecule had a value
passivating_value = passivating[passivating['passivating_molecule'].notnull()]
passivating_value

Unnamed: 0,paper_num,passivating_molecule,smiles
0,0,4-chlorobenzenesulfonate,C1=CC(=CC=C1S(=O)(=O)[O-])Cl
4,2,butylammonium lead tetra iodide,[Pb](I)(I)(I)I.C(CCC)[NH3+]
8,5,iso-butylammonium iodide,CC(C)C[NH3+].[I-]
9,6,phenylethylammonium iodide,C1=CC=C(C=C1)CC[NH3+].[I-]
11,7,4-tert-butyl-benzylammonium,C(C)(C)(C)C1=CC=C(C[NH3+])C=C1
...,...,...,...
254,146,ortho-carborane decorated with phenylamino groups,
255,147,carbazole-triphenylamine and phenylammonium io...,
257,148,phenylethylammonium iodide,C1=CC=C(C=C1)CC[NH3+].[I-]
258,148,4-tert-butyl-benzylammonium iodide,CC(C)(C)C1=CC=C(C=C1)CN.I


In [12]:
smile_failed = passivating_value[passivating_value['smiles'].isnull()]
smile_failed

Unnamed: 0,paper_num,passivating_molecule,smiles
17,8,pyrene based methylammonium iodide,
18,8,pyrene based ammonium iodide,
19,8,pyrene based ethylammonium iodide,
33,16,hydrophobic 3-(trifluoromethyl) phenethylamine...,
55,39,2-thiophenemethylammonium iodide,
56,39,2-thiophenemethylammonium chloride,
66,48,polystyrene,
73,53,"1,4-butane diammonium iodide",
110,74,copolymer of ethylene dimethacrylate (EDMA) an...,
112,75,2-thiopheneethylammonium iodide,


In [13]:
print(smile_failed.shape)
len(smile_failed['paper_num'].unique())

(18, 3)


12

In [14]:
smile_failed.iloc[3]['passivating_molecule']

'hydrophobic 3-(trifluoromethyl) phenethylamine hydroiodide'

In [15]:
from rdkit import Chem
from rdkit.Chem import rdmolops

# Assuming df is your DataFrame and 'passivating_molecule' is the column to be converted
def convert_to_iupac(chemical_name):
    # Convert the chemical name to a molecule object using RDKit
    try:
        molecule = Chem.MolFromSmiles(chemical_name)
        if molecule:
            # Return the IUPAC name for the molecule
            return Chem.MolToIUPACName(molecule)
        else:
            return None  # In case the molecule cannot be parsed
    except Exception as e:
        print(f"Error fetching from OPSIN for {chemical_name}: {e}")
        return None

# # Apply this function to the 'passivating_molecule' column and create a new column 'iupac_name'
# df['iupac_name'] = df['passivating_molecule'].apply(convert_to_iupac)

# import ace_tools as tools; tools.display_dataframe_to_user(name="Updated DataFrame with IUPAC Names", dataframe=df)/


In [None]:
# # Apply this function to the 'passivating_molecule' column and create a new column 'iupac_name'
smile_failed['iupac_name'] = smile_failed['passivating_molecule'].apply(convert_to_iupac)

[05:15:46] SMILES Parse Error: syntax error while parsing: pyrene
[05:15:46] SMILES Parse Error: Failed parsing SMILES 'pyrene' for input: 'pyrene'
[05:15:46] SMILES Parse Error: syntax error while parsing: pyrene
[05:15:46] SMILES Parse Error: Failed parsing SMILES 'pyrene' for input: 'pyrene'
[05:15:46] SMILES Parse Error: syntax error while parsing: pyrene
[05:15:46] SMILES Parse Error: Failed parsing SMILES 'pyrene' for input: 'pyrene'
[05:15:46] SMILES Parse Error: syntax error while parsing: hydrophobic
[05:15:46] SMILES Parse Error: Failed parsing SMILES 'hydrophobic' for input: 'hydrophobic'
[05:15:46] SMILES Parse Error: syntax error while parsing: 2-thiophenemethylammonium
[05:15:46] SMILES Parse Error: Failed parsing SMILES '2-thiophenemethylammonium' for input: '2-thiophenemethylammonium'
[05:15:46] SMILES Parse Error: syntax error while parsing: 2-thiophenemethylammonium
[05:15:46] SMILES Parse Error: Failed parsing SMILES '2-thiophenemethylammonium' for input: '2-thiophen

ModuleNotFoundError: No module named 'ace_tools'

In [None]:
smile_failed

Unnamed: 0,paper_num,passivating_molecule,smiles,iupac_name
17,8,pyrene based methylammonium iodide,,
18,8,pyrene based ammonium iodide,,
19,8,pyrene based ethylammonium iodide,,
33,16,hydrophobic 3-(trifluoromethyl) phenethylamine...,,
55,39,2-thiophenemethylammonium iodide,,
56,39,2-thiophenemethylammonium chloride,,
66,48,polystyrene,,
73,53,"1,4-butane diammonium iodide",,
110,74,copolymer of ethylene dimethacrylate (EDMA) an...,,
112,75,2-thiopheneethylammonium iodide,,


In [105]:
fetch_smiles("dicesium diododichloroplumbate")

Both method failed


In [87]:
df_expanded[df_expanded['paper_num'] == 42]

Unnamed: 0,paper_num,perovskite_composition,electron_transport_layer,hole_transport_layer,structure_pin_nip,stability_type,passivating_molecule,humidity,temperature,time,control_pce,treated_pce,control_voc,treated_voc,efficiency_cont,efficiency_tret,perovskite_molecule
69,42,Cs0.05MA0.1FA0.85PbI2.7Br0.3,,,,ISOSL,azetidinium lead iodide,,52.0,1000.0,19.5,22.0,1.07,1.14,47.0,85.0,


## Inspect having both passivating and perovskite

In [74]:
df_expanded[(df_expanded['perovskite_composition'].notnull()) & (df_expanded['passivating_molecule'].notnull())]['paper_num'].unique()

array([  0,   2,   5,   6,   7,   8,   9,  10,  12,  13,  14,  15,  16,
        19,  20,  22,  23,  31,  36,  39,  42,  46,  48,  50,  53,  59,
        60,  62,  64,  71,  72,  73,  74,  75,  77,  79,  82,  85,  95,
        97, 113, 114, 118, 120, 126, 131, 133, 143, 145, 146, 147, 148,
       149], dtype=int64)

## Inspect Missingness

In [88]:
df_expanded[df_expanded['perovskite_composition'].isnull()]['paper_num'].unique()

array([  1,  11,  18,  27,  29,  37,  40,  41,  44,  45,  49,  52,  83,
        86,  91,  92,  94,  99, 100, 101, 105, 106, 109, 115, 119, 121,
       127, 129, 130, 135, 137, 141, 142, 144], dtype=int64)

In [89]:
df_expanded[df_expanded['passivating_molecule'].isnull()]['paper_num'].unique()

array([  3,   4,   7,  10,  11,  17,  21,  24,  25,  26,  28,  29,  30,
        32,  33,  35,  37,  43,  44,  45,  47,  49,  51,  52,  54,  55,
        56,  57,  58,  61,  63,  65,  66,  67,  68,  69,  70,  73,  76,
        78,  80,  81,  83,  84,  86,  87,  88,  89,  90,  91,  92,  93,
        94,  95,  96,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107,
       108, 109, 110, 111, 112, 115, 116, 117, 119, 121, 122, 123, 124,
       125, 128, 129, 130, 131, 132, 134, 135, 136, 137, 138, 139, 140,
       141, 142], dtype=int64)

In [81]:
df_expanded[df_expanded['paper_num'] == 67]

Unnamed: 0,paper_num,perovskite_composition,electron_transport_layer,hole_transport_layer,structure_pin_nip,test,stability_type,passivating_molecule,humidity,temperature,time,control_pce,treated_pce,control_voc,treated_voc,efficiency_cont,efficiency_tret,perovskite_molecule
107,67,FA0.83Cs0.17PbI2.7Br0.3,TinOxide,polybisphenyl(trimethylphenyl)amine,PIN,test_1,ISOSLT,,,,250.0,,,,,,96.0,
108,67,FA0.83Cs0.17PbI2.7Br0.3,TinOxide,polybisphenyl(trimethylphenyl)amine,PIN,test_2_2,ISOSL2,,,85.0,800.0,,21.3,,,,95.0,
109,67,FA0.83Cs0.17PbI2.7Br0.3,TinOxide,polybisphenyl(trimethylphenyl)amine,PIN,test_2,ISOSL1,,,,20.0,,21.1,,,,90.0,
110,67,FA0.83Cs0.17PbI2.7Br0.3,TinOxide,polybisphenyl(trimethylphenyl)amine,PIN,test_3,ISOSD1,,,,500.0,,7.0,,,,100.0,


# Efficiency cont Check - Done

In [174]:
data_inspect = df_expanded['efficiency_cont'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

60.0
80.0
0.0
70.0
40.0
50.0
65.0
20.0
27.0
58.0
84.0
29.0
38.0
30.0
67.2
63.0
90.0
68.0
71.0
4.0
83.0
87.0
96.5
58.6
72.0
75.8
85.0
6.0
55.0
73.2
77.6
56.0
75.6
78.0
10.0
89.0
92.7
94.0
61.0
74.0
16.8
4.4
98.0
82.0
76.0
51.0
47.0
9.65
50.6
59.1
70.8
57.0
44.0
93.0
32.0
49.0
71.3
25.0
95.0
66.6


In [137]:
efficiency_cont = df_expanded[["paper_num","efficiency_cont"]]
efficiency_cont = efficiency_cont[efficiency_cont['efficiency_cont'] == 0.7]
efficiency_cont

Unnamed: 0,paper_num,efficiency_cont


# Efficiency treat Check - Done

In [175]:
data_inspect = df_expanded['efficiency_tret'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

90.0
95.0
80.0
100.0
92.0
85.0
98.0
96.0
91.0
94.0
75.0
93.0
86.0
97.0
87.0
74.0
99.0
88.0
82.0
73.0
65.0
98.2
53.0
104.0
20.0
97.5
92.2
98.7
45.0
95.2
98.9
91.8
90.5
75.6
88.7
89.0
94.5
71.0
92.3
94.6
67.0
1.0
92.6
81.0
65.1
84.8
47.9
99.4
96.8
93.8
95.7
96.2
35.0
86.2
60.0
91.5
83.0
76.8
50.0
82.1
91.1
96.7
84.0


In [139]:
efficiency_tret = df_expanded[["paper_num","efficiency_tret"]]
efficiency_tret = efficiency_tret[efficiency_tret['efficiency_tret'] == 0.92]
efficiency_tret

Unnamed: 0,paper_num,efficiency_tret


# Composition - DONE

In [176]:
data_inspect = df_expanded['perovskite_composition'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

(FAPbI3)0.95(MAPbBr3)0.05
FAPbI 3
MAPbI 3
MAPbI3
FA0.85MA0.1Cs0.05PbI2.9Br0.1
Cs0.05(MA)0.16(FA)0.79Pb(I0.83Br0.17 )3
FA0.83Cs0.17PbI2.7Br0.3
FA0.98Cs0.02PbI3
FA0.98MA0.02Pbl3
Cs0.1FA0.9PbI3
Cs 0.05(FA0.98MA0.02)0.95Pb(I0.98Br0.02)3
FAPbI3
MAPbBr 3
Cs0.15FA0.85PbI2.55Br0.45
(FAPbI3)0.95(MAPbBr 3)0.05
(FAPbI3)0.97(MC)0.03
[(FAPbI 3 ) 0.8(MAPbBr3)0.13]0.92(CsPbI3)0.08
F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
(FAPbI 3 ) 0.87(MAPbBr3)0.13]0.92(CsPbI3)0.08
Cs0.03(FA0.97MA0.03)0.97Pb(I0.97Br0.03)3
Cs0.05(FAPbI3)0.85(MAPbBr3)0.15
(MA)0.14(FA)0.81(Cs)0.05Pb(I)3
Cs0.05FA0.85MA0.1PbI3
Rb0.05Cs0.05MA0.05FA0.85Pb(I0.95Br0.05)3
(FAPbI3)0.94(MAPbBr3)0.06
Cs0.05FA0.81MA0.14PbI2.55Br0.45
Cs0.05MA0.05FA0.9Pb(I0.95Br0.05)3\
Cs0.12FA0.8MA0.08PbI1.8Br1
2-phenylethylammonium iodide
Cs 0.05 (FA0.92MA0.08)0.95Pb(I0.92Br0.08)3
Cs0.05FA0.95PbI3
FA0.95MA0.05Pb(I0.92Br0.08)3
Cs0.05(FA5/6MA1/6)0.95Pb(I0.9Br0.1)3
FA0.9Cs0.1PbI3
Cs 0.05(FA0.95MA0.05)0.95Pb(I0.95Br0.05)3
FA0.83MA0.17Pb-(I0.83Br0.17)3
Cs0.05(FA0.9

## Perovskite Composition problem
- F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
- Cl-contained FAPbI3


In [177]:
composition = df_expanded[["paper_num","perovskite_composition"]]
composition = composition[composition['perovskite_composition'] == "F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45"]
composition

Unnamed: 0,paper_num,perovskite_composition
134,88,F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
135,88,F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45
136,88,F- Cs 0.05 FA 0.81 MA 0.14 PbI 2.55 Br 0.45


# Inspect passivating molecule

In [178]:
data_inspect = df_expanded['passivating_molecule'].value_counts()

for index, value in data_inspect.iteritems():
    print(index)

phenethylammonium iodide
allylammonium iodide
4-chlorobenzenesulfonate
octylammonium iodide
butylammonium lead tetra iodide
cyclohexylmethylammonium iodide
phenylethylammonium iodide
Dimethylpyrroline Diammonium Iodide
1,4-butane diammonium iodide
2-thiophenemethylammonium iodide
ortho-(phenylene)di(ethylammonium) iodide
3-(aminomethyl)pyridine
n-Butylammonium bromide
3-fluoro-phenethylammonium iodide
n-hexyl trimethyl ammonium bromide
oleylammonium iodide
oleylamine iodide
butylammonium iodide
3,4,5-trifluoroanilinium
phenylethylammonium lead iodide
(phenethylamino)methaniminium iodide
4-fluorophenylethylammonium iodide
2-thiopheneethylammonium chloride
4-tert-butyl-benzylammonium
carbazole-triphenylamine and phenylammonium iodide units
4-fluoro-phenylethylammonium iodide
ethylenediammonium diiodide
chlorine-rich mixed-halide perovskite interlayer
Lead Iodide
phenyl-C61-butyric acid methyl ester
L-α-phosphatidylcholine
4-tert-butyl-benzylammonium iodide
choline chloride
ortho-carboran

### Passivating issues
Issue raised by Kang and ChatGPT
- iso-butylamine iodide --> paper 5: butylamine iodide is convertable
- EDBE --> paper 9: wasn't the tested passivating. Fixed.
- MAPbBr3 --> paper 79: Done this was perovskite molecule, not compostion not passivating
- FAPbI3 --> paper 89 Changed into perovskite composition, but this there is no passivation in this paper. 
- FAPbI3 --> paper 91 Paper about additive and no passivation was mentioned
- chlorophenylethylammonium iodide --> paper 14: this was 4-chlorophenylethylammonium iodide and an additive.
- fluorophenylethylammonium iodide --> paper 15: Talking with kelly if this is consistently passivating or additive
- fluorophenylethylammonium iodide --> paper 143: fluorophenylethylammonium lead iodide was passivating. Fixed. 
- 3,4,5-trifluoroanilinium --> paper 19: This is passivating. Done. 
- tri-octyl phosphine oxide --> paper 36 There was no metric associated with this passivating. 
- azetidinium lead iodide --> paper 42 nothing wrong, perfect paper 
- DMePDAI 2 --> paper 53 Dimethylpyrroline Diammonium Iodide
- (phenethylamino)methaniminium iodide --> paper 77 nothing wrong, perfect paper
- NOTE:This is how to prepare the PSC,relevent...? --> paper 60 Fixed. the treatment was 4-vinylbenzylammonium bromide
- lead iodide --> paper 102 perovskite molecule. This paper had no passivating. skip. 
- formate --> paper 110 formate is an additive to composition. Passivating was not mentioned, skip
- europium ion pair --> Already reviewed, they were no passivation, so skiped.
- ortho-carborane --> paper 146 This is indeed passivating. Additionally, CB-NH2 is also another passivating. 
- "OATsO" & "OABF4" ---> paper 10
- CF3-phenethylammonium --> paper 16 Already reviewed. hydrophobic 3-(trifluoromethyl)phenethylamine hydroiodide
- CF3 -PEAI --> paper 73 3,5-bis(trifluoromethyl)phenethylammonium iodide
- 4-chlorobenzenesulfonate (4Cl-BZS) --> paper 0 Perfect paper, done. 
- lead oxalate --> paper 126 The passivation is correct. PbC2O4, 
- sodium thioglycolate --> paper 133 passivating correct
- quanternary ammonium halides --> paper 82 this is not a specific passivating name. Already corrected. 
- ferrocenyl-bis-thiophene-2-carboxylate --> paper 120 correct passivation (FcTc2) tested. 


- PCBM --> 68 [6 6']-phenyl-C61-butyric acid methyl ester. Has passivating and perovskite, but this is tested on ETL bilayer. Maybe record only PCE
- PCBM --> 69 [6 6']-phenyl-C61-butyric acid methyl ester. Has passivating and perovskite, but testing on different way ETL is treated. Maybe record only PCE
- PS Plastic Foam 1% --> paper 48  want to know the difference between PS plastic foam and PS 0.02%. Asking Kelly
- 4-trifluoromethyl-phenylammonium --> paper 149 Issue with identifying passivating cation. Also with composition. Asking kelly
- 4-fluorophenylethylammonium iodide --> paper 74 Differentiation between 2D and CLP confusion. Asking Kelly

<br>
- poly(methyl methacrylate)


In [179]:
passivatin = df_expanded[["paper_num","passivating_molecule"]]
passivatin = passivatin[passivatin['passivating_molecule'] == "poly(methyl methacrylate)"]
passivatin

Unnamed: 0,paper_num,passivating_molecule


In [81]:
passivatin = df_expanded[["paper_num","passivating_molecule"]]
passivatin = passivatin[passivatin['passivating_molecule'] == "2-thiopheneethylammonium iodide"]
passivatin

Unnamed: 0,paper_num,passivating_molecule
95,75,2-thiopheneethylammonium iodide


In [86]:
import pubchempy as pcp
import numpy as np

In [87]:
def fetch_smiles_from_name(molecule_name):
    try:
        # Search for the molecule in PubChem by name
        compounds = pcp.get_compounds(molecule_name, 'name')
        if compounds:
            return compounds[0].isomeric_smiles  # Return the first match's SMILES
        else:
            return np.nan
    except Exception as e:
        print(f"Error fetching SMILES for {molecule_name}: {e}")
        return None

In [88]:
fetch_smiles_from_name("2-thiopheneethylammonium iodide")

nan