#### Design Pool generation for deposition related features (Step-2)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import pickle

In [2]:
pwd = os.getcwd() # current working directory

In [3]:
df = pd.read_csv(pwd+"/Sn_based/Sn_cleaned.csv") # Loading cleaned dataset for Sn based perovskites
df.head()

Unnamed: 0,Ref_DOI_number,Cell_architecture,Cell_flexible,Cell_semitransparent,Substrate_stack_sequence,ETL_stack_sequence,ETL_thickness,ETL_deposition_procedure,Perovskite_dimension_2D,Perovskite_dimension_2D3D_mixture,...,HTL_stack_sequence,HTL_thickness_list,HTL_deposition_procedure,Backcontact_stack_sequence,Backcontact_thickness_list,Backcontact_deposition_procedure,JV_default_Voc,JV_default_Jsc,JV_default_FF,JV_default_PCE
0,10.1002/adfm.201807696,pin,False,False,SLG | ITO,PCBM-60 | BCP,142.0,Spin-coating | Evaporation,True,False,...,PEDOT:PSS,114.0,Spin-coating,Ag,200.0,Evaporation,0.58,21.2,0.633,7.78
1,10.1002/adfm.201807696,pin,False,False,SLG | ITO,PCBM-60 | BCP,142.0,Spin-coating | Evaporation,True,False,...,PEDOT:PSS,114.0,Spin-coating,Ag,200.0,Evaporation,0.6,20.5,0.65,7.95
2,10.1002/adfm.201807696,pin,False,False,SLG | ITO,PCBM-60 | BCP,142.0,Spin-coating | Evaporation,True,False,...,PEDOT:PSS,114.0,Spin-coating,Ag,200.0,Evaporation,0.61,21.0,0.67,8.71
3,10.1002/adfm.201807696,pin,False,False,SLG | ITO,PCBM-60 | BCP,142.0,Spin-coating | Evaporation,True,False,...,PEDOT:PSS,114.0,Spin-coating,Ag,200.0,Evaporation,0.54,20.3,0.63,5.94
4,10.1021/acsenergylett.9b00954,pin,False,False,SLG | ITO,C60 | LiF,31.0,Evaporation | Evaporation,True,False,...,PEDOT:PSS,38.0,Spin-coating,Al,100.0,Evaporation,0.56,18.3,0.493,5.07


In [6]:
# Selecting columns
cols = ['Cell_architecture', 'Cell_flexible',
       'Cell_semitransparent', 'Substrate_stack_sequence',
       'ETL_stack_sequence', 'ETL_thickness', 'ETL_deposition_procedure',
       'Perovskite_dimension_2D', 'Perovskite_dimension_2D3D_mixture',
       'Perovskite_dimension_3D',
       'Perovskite_dimension_3D_with_2D_capping_layer',
       'Perovskite_composition_perovskite_ABC3_structure',
       'Perovskite_composition_long_form',
       'Perovskite_thickness',
       'Perovskite_composition_inorganic', 'Perovskite_band_gap',
       'Perovskite_band_gap_graded', 'Perovskite_deposition_procedure',
       'Perovskite_deposition_solvents',
       'Perovskite_deposition_quenching_induced_crystallisation',
       'Perovskite_deposition_thermal_annealing_temperature',
       'Perovskite_deposition_thermal_annealing_time',
       'Perovskite_deposition_solvent_annealing', 'HTL_stack_sequence',
       'HTL_thickness_list', 'HTL_deposition_procedure',
       'Backcontact_stack_sequence', 'Backcontact_thickness_list',
       'Backcontact_deposition_procedure', 'JV_default_PCE']
df = df[cols] # selecting the columns which are in the 'cols' list
df.shape

(256, 30)

In [7]:
feats = df.iloc[:,:-1] # Selecting feature columns
feats.shape # shape of the features

(256, 29)

In [9]:
# Seperating features into groups of numerical, categorical and boolean types
numerical = list(feats.select_dtypes(include="float64"))
categorical = list(feats.select_dtypes(include="object"))
boolean = list(feats.select_dtypes(include="bool"))

print("Numerical: ", len(numerical))
print("Categorical: ", len(categorical))
print("Boolean: ", len(boolean))

Numerical:  5
Categorical:  13
Boolean:  11


In [10]:
# Features related to device architecture
feats_arch = [        
            'Cell_architecture', 'Cell_flexible',
            'Cell_semitransparent', 'Substrate_stack_sequence',
            'ETL_stack_sequence',
            'ETL_thickness', 
            'Perovskite_dimension_2D', 'Perovskite_dimension_2D3D_mixture',
            'Perovskite_dimension_3D',
            'Perovskite_dimension_3D_with_2D_capping_layer',
            'Perovskite_composition_perovskite_ABC3_structure',
            'Perovskite_composition_long_form',
            'Perovskite_composition_inorganic',
            'Perovskite_thickness',
            'Perovskite_band_gap', 'Perovskite_band_gap_graded',
            'HTL_stack_sequence',
            'HTL_thickness_list',
            'Backcontact_stack_sequence',
            'Backcontact_thickness_list']

# Features related to deposition
feats_deposition = [
            'ETL_deposition_procedure',
            'Perovskite_deposition_procedure', 
            'Perovskite_deposition_solvents',
            'Perovskite_deposition_quenching_induced_crystallisation',
            'Perovskite_deposition_thermal_annealing_temperature',
            'Perovskite_deposition_thermal_annealing_time',
            'Perovskite_deposition_solvent_annealing',
            'HTL_deposition_procedure',
            'Backcontact_deposition_procedure'
]
# printing number of features in each group
print(f"Architecture features: {len(feats_arch)}\nDeposition features: {len(feats_deposition)}\nTotal features: {feats.shape[1]}")

Architecture features: 20
Deposition features: 9
Total features: 29


In [11]:
# Number of unique labels on each feature related to deposition
depo_unique = {}
for f in feats_deposition:
    lst = list(df[f].unique())
    depo_unique[f] = lst
for k in depo_unique.keys():
    print(k+" : "+str(len(depo_unique[k])))

ETL_deposition_procedure : 10
Perovskite_deposition_procedure : 3
Perovskite_deposition_solvents : 11
Perovskite_deposition_quenching_induced_crystallisation : 2
Perovskite_deposition_thermal_annealing_temperature : 21
Perovskite_deposition_thermal_annealing_time : 20
Perovskite_deposition_solvent_annealing : 2
HTL_deposition_procedure : 5
Backcontact_deposition_procedure : 4


### Design Pool for Deposition related features

In [12]:
"""
feats_deposition = [
            'ETL_deposition_procedure',
            'Perovskite_deposition_procedure', 
            'Perovskite_deposition_solvents',
            'Perovskite_deposition_quenching_induced_crystallisation',
            'Perovskite_deposition_thermal_annealing_temperature',
            'Perovskite_deposition_thermal_annealing_time',
            'Perovskite_deposition_solvent_annealing',
            'HTL_deposition_procedure',
            'Backcontact_deposition_procedure'
]
"""
# Making a list of unique labels in each column
etl_depo_lst = depo_unique["ETL_deposition_procedure"]
per_depo_lst = depo_unique["Perovskite_deposition_procedure"]
per_sol_lst = depo_unique["Perovskite_deposition_solvents"]
per_qic_lst = depo_unique["Perovskite_deposition_quenching_induced_crystallisation"]
per_temp_lst = depo_unique["Perovskite_deposition_thermal_annealing_temperature"]
per_time_lst = depo_unique["Perovskite_deposition_thermal_annealing_time"]
per_ann_lst = depo_unique["Perovskite_deposition_solvent_annealing"]
htl_depo_lst = depo_unique["HTL_deposition_procedure"]
back_depo_lst = depo_unique["Backcontact_deposition_procedure"]

etl_depo = []
per_depo = []
per_sol = []
per_qic = []
per_temp = []
per_time = []
per_ann = []
htl_depo = []
back_depo = []

# Permutating through each layers of features
for l0 in etl_depo_lst:
    for l1 in per_depo_lst:
        for l2 in per_sol_lst:
            for l3 in per_qic_lst:
                for l4 in per_temp_lst:
                    for l5 in per_time_lst:
                        for l6 in per_ann_lst:
                            for l7 in htl_depo_lst:
                                for l8 in back_depo_lst:
                                    etl_depo.append(l0)
                                    per_depo.append(l1)
                                    per_sol.append(l2)
                                    per_qic.append(l3)
                                    per_temp.append(l4)
                                    per_time.append(l5)
                                    per_ann.append(l6)
                                    htl_depo.append(l7)
                                    back_depo.append(l8)

# Create a design pool of all possible combination of deposition related features   
dp_depo = pd.DataFrame(
        {
            'ETL_deposition_procedure': etl_depo,
            'Perovskite_deposition_procedure': per_depo,
            'Perovskite_deposition_solvents': per_sol, 
            'Perovskite_deposition_quenching_induced_crystallisation': per_qic,
            'Perovskite_deposition_thermal_annealing_temperature': per_temp,
            'Perovskite_deposition_thermal_annealing_time': per_time,
            'Perovskite_deposition_solvent_annealing': per_ann,
            'HTL_deposition_procedure': htl_depo,
            'Backcontact_deposition_procedure': back_depo
        }
    )

dp_depo.shape # Initial shape of the design pool

(11088000, 9)

In [13]:
dp_depo.head()

Unnamed: 0,ETL_deposition_procedure,Perovskite_deposition_procedure,Perovskite_deposition_solvents,Perovskite_deposition_quenching_induced_crystallisation,Perovskite_deposition_thermal_annealing_temperature,Perovskite_deposition_thermal_annealing_time,Perovskite_deposition_solvent_annealing,HTL_deposition_procedure,Backcontact_deposition_procedure
0,Spin-coating | Evaporation,Spin-coating,DMF; DMSO,True,100,20,False,Spin-coating,Evaporation
1,Spin-coating | Evaporation,Spin-coating,DMF; DMSO,True,100,20,False,Spin-coating,Evaporation | Evaporation
2,Spin-coating | Evaporation,Spin-coating,DMF; DMSO,True,100,20,False,Spin-coating,Sandwiching
3,Spin-coating | Evaporation,Spin-coating,DMF; DMSO,True,100,20,False,Spin-coating,Lamination
4,Spin-coating | Evaporation,Spin-coating,DMF; DMSO,True,100,20,False,Spin-coating | Spin-coating,Evaporation


In [14]:
# Importing the datapoints which showed improved efficiency from architecture optimization in Step-1
df_imp_pce_arch = pd.read_csv(pwd+"/predictions/improved_pce_archi.csv") # Reading the improved pce data
df_imp_pce_arch.shape

(11834, 30)

In [15]:
df_imp_pce_arch.head()

Unnamed: 0,Cell_architecture,Cell_flexible,Cell_semitransparent,Substrate_stack_sequence,ETL_stack_sequence,ETL_thickness,ETL_deposition_procedure,Perovskite_dimension_2D,Perovskite_dimension_2D3D_mixture,Perovskite_dimension_3D,...,Perovskite_deposition_thermal_annealing_temperature,Perovskite_deposition_thermal_annealing_time,Perovskite_deposition_solvent_annealing,HTL_stack_sequence,HTL_thickness_list,HTL_deposition_procedure,Backcontact_stack_sequence,Backcontact_thickness_list,Backcontact_deposition_procedure,Predicted_PCE
0,pin,False,False,SLG | ITO,PCBM-60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,PEDOT:PSS,344.0,Spin-coating,Cu,100.0,Evaporation,12.442712
1,pin,False,False,SLG | ITO,PCBM-60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,PCBM-60,344.0,Spin-coating,Cu,100.0,Evaporation,12.442712
2,pin,False,False,SLG | ITO,PCBM-60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,NiO-c,344.0,Spin-coating,Cu,100.0,Evaporation,12.600066
3,pin,False,False,SLG | ITO,PCBM-60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,NiO-np,344.0,Spin-coating,Cu,100.0,Evaporation,12.251941
4,pin,False,False,SLG | ITO,PCBM-60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,PTAA,344.0,Spin-coating,Cu,100.0,Evaporation,12.338658


In [16]:
df_imp_pce_arch.sort_values(by="Predicted_PCE",ascending=False,inplace=True) # Sorting data based on Predicted PCE

In [17]:
df_imp_pce_arch.head()

Unnamed: 0,Cell_architecture,Cell_flexible,Cell_semitransparent,Substrate_stack_sequence,ETL_stack_sequence,ETL_thickness,ETL_deposition_procedure,Perovskite_dimension_2D,Perovskite_dimension_2D3D_mixture,Perovskite_dimension_3D,...,Perovskite_deposition_thermal_annealing_temperature,Perovskite_deposition_thermal_annealing_time,Perovskite_deposition_solvent_annealing,HTL_stack_sequence,HTL_thickness_list,HTL_deposition_procedure,Backcontact_stack_sequence,Backcontact_thickness_list,Backcontact_deposition_procedure,Predicted_PCE
2450,pin,False,False,SLG | ITO,SnO2-np | CPTA,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,NiO-c,344.0,Spin-coating,Cu,100.0,Evaporation,13.569096
822,pin,False,False,SLG | ITO,PCBM-60 | C60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,NiO-np,344.0,Spin-coating,Cu,100.0,Evaporation,13.523774
4596,pin,False,False,SLG | FTO,PCBM-60 | C60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,Spiro-MeOTAD,344.0,Spin-coating,Cu,100.0,Evaporation,13.506297
4335,pin,False,False,SLG | FTO,PCBM-60 | C60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,Spiro-MeOTAD,344.0,Spin-coating,Cu,100.0,Evaporation,13.506297
4679,pin,False,False,SLG | FTO,PCBM-60 | C60 | BCP,26.0,Evaporation | Evaporation,False,False,True,...,100,10,False,Spiro-MeOTAD,344.0,Spin-coating,Cu,100.0,Evaporation,13.506297


In [18]:
# All the features related to architecture is replaced with the values with data of highest PCE predicted from Step-1
for f in feats_arch:
    dp_depo[f] = df_imp_pce_arch[f].iloc[0]
dp_depo = dp_depo[list(feats.columns)]
dp_depo.shape # final shape of the Deposition design pool

(11088000, 29)

In [19]:
dp_depo.head()

Unnamed: 0,Cell_architecture,Cell_flexible,Cell_semitransparent,Substrate_stack_sequence,ETL_stack_sequence,ETL_thickness,ETL_deposition_procedure,Perovskite_dimension_2D,Perovskite_dimension_2D3D_mixture,Perovskite_dimension_3D,...,Perovskite_deposition_quenching_induced_crystallisation,Perovskite_deposition_thermal_annealing_temperature,Perovskite_deposition_thermal_annealing_time,Perovskite_deposition_solvent_annealing,HTL_stack_sequence,HTL_thickness_list,HTL_deposition_procedure,Backcontact_stack_sequence,Backcontact_thickness_list,Backcontact_deposition_procedure
0,pin,False,False,SLG | ITO,SnO2-np | CPTA,26.0,Spin-coating | Evaporation,False,False,True,...,True,100,20,False,NiO-c,344.0,Spin-coating,Cu,100.0,Evaporation
1,pin,False,False,SLG | ITO,SnO2-np | CPTA,26.0,Spin-coating | Evaporation,False,False,True,...,True,100,20,False,NiO-c,344.0,Spin-coating,Cu,100.0,Evaporation | Evaporation
2,pin,False,False,SLG | ITO,SnO2-np | CPTA,26.0,Spin-coating | Evaporation,False,False,True,...,True,100,20,False,NiO-c,344.0,Spin-coating,Cu,100.0,Sandwiching
3,pin,False,False,SLG | ITO,SnO2-np | CPTA,26.0,Spin-coating | Evaporation,False,False,True,...,True,100,20,False,NiO-c,344.0,Spin-coating,Cu,100.0,Lamination
4,pin,False,False,SLG | ITO,SnO2-np | CPTA,26.0,Spin-coating | Evaporation,False,False,True,...,True,100,20,False,NiO-c,344.0,Spin-coating | Spin-coating,Cu,100.0,Evaporation


In [20]:
pickle.dump(dp_depo, open(pwd+"/design_pool/design_pool_depo.pkl","wb")) # Saving the design pool