In [1]:
import pandas as pd
import numpy as np

In [2]:
from mp_api.client import MPRester # Importing mp_api library
api_key = "ZbGxsvtmixirV3kWQRUGTzNaLQqdZQHD" #this is my api_key
mpr = MPRester(api_key=api_key) # creating MPRester object

In [3]:
# Define A-site (alkaline and rare earth metals) and B-site (transition metals)
alkaline_metals = ["Li", "Na", "K", "Rb", "Cs", "Be", "Mg", "Ca", "Sr", "Ba"]
rare_earth_metals = ["La", "Ce", "Pr", "Nd", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu"]
transition_metals = ["Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Y", "Zr", "Nb", "Mo", "Ru", "Rh", 
                     "Pd", "Ag", "Cd", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au"]

# Combine alkaline and rare earth metals for A-site elements
elements_A = alkaline_metals + rare_earth_metals
anions_X = ["O","Cl", "Br", "I", "F", "S", "Se", "Te"]  # Common X-site elements
all_elements = elements_A+anions_X+transition_metals

In [4]:
# Querying the data (oxides)
data = mpr.materials.summary.search(formula="AABCX6", fields= ['formula_pretty','nsites','chemsys','volume','density','density_atomic',
                                                             'energy_per_atom','formation_energy_per_atom','is_stable',
                                                            'band_gap','cbm','vbm','efermi','is_gap_direct',
                                                            'is_magnetic','ordering','total_magnetization',
                                                             'total_magnetization_normalized_vol','total_magnetization_normalized_formula_units',
                                                            'structure','is_metal','symmetry','energy_above_hull','elements'])

Retrieving SummaryDoc documents:   0%|          | 0/4817 [00:00<?, ?it/s]

In [5]:
def is_double_perovskite(row):
    elements = [str(el) for el in row['elements']]  # Convert elements to string format

    # A site should contain alkali, alkaline earth, or rare earth metals
    A_elements = [elem for elem in elements if elem in elements_A]
    
    # B and C sites should contain transition metals
    B_C_elements = [elem for elem in elements if elem in transition_metals]
    
    # X site should be in the anions list (oxides or halides)
    X_elements = [elem for elem in elements if elem in anions_X]
    
    # Check if the structure matches A2BCX6: 2 A elements, 2 B/C elements, 1 X element
    return len(A_elements) >= 1 and len(B_C_elements) >= 2 and len(X_elements) == 1

In [6]:
df = pd.DataFrame([doc.dict() for doc in data])

In [7]:
df['is_double_perovskite'] = df.apply(is_double_perovskite, axis=1)

In [8]:
df_double_perovskites = df[df['is_double_perovskite']]

In [9]:
df_double_perovskites[['formula_pretty','chemsys','elements']]

Unnamed: 0,formula_pretty,chemsys,elements
43,Ba2CdMoO6,Ba-Cd-Mo-O,"[Ba, Cd, Mo, O]"
44,Ba2CdOsO6,Ba-Cd-O-Os,"[Ba, Cd, O, Os]"
45,Ba2CdReO6,Ba-Cd-O-Re,"[Ba, Cd, O, Re]"
71,Ba2CoMoO6,Ba-Co-Mo-O,"[Ba, Co, Mo, O]"
72,Ba2CoMoO6,Ba-Co-Mo-O,"[Ba, Co, Mo, O]"
...,...,...,...
4812,MgMnV2O6,Mg-Mn-O-V,"[Mg, Mn, O, V]"
4813,MgMnV2O6,Mg-Mn-O-V,"[Mg, Mn, O, V]"
4814,MgMo(WS3)2,Mg-Mo-S-W,"[Mg, Mo, S, W]"
4815,MgMo(WS3)2,Mg-Mo-S-W,"[Mg, Mo, S, W]"


In [10]:
df_double_perovskites.info()

<class 'pandas.core.frame.DataFrame'>
Index: 920 entries, 43 to 4816
Data columns (total 71 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   builder_meta                                  0 non-null      object 
 1   nsites                                        920 non-null    int64  
 2   elements                                      920 non-null    object 
 3   nelements                                     0 non-null      object 
 4   composition                                   0 non-null      object 
 5   composition_reduced                           0 non-null      object 
 6   formula_pretty                                920 non-null    object 
 7   formula_anonymous                             0 non-null      object 
 8   chemsys                                       920 non-null    object 
 9   volume                                        920 non-null    float6

In [11]:
columns_of_interest = ['nsites','chemsys','volume','density','density_atomic',
                       'energy_per_atom','formation_energy_per_atom','is_stable',
                        'band_gap','cbm','vbm','efermi','is_gap_direct','is_metal',
                        'is_magnetic','ordering','total_magnetization',
                        'total_magnetization_normalized_vol','total_magnetization_normalized_formula_units',
                      'symmetry','structure','energy_above_hull']

In [12]:
refined_perovskite_df = df_double_perovskites[columns_of_interest]

In [13]:
refined_perovskite_df

Unnamed: 0,nsites,chemsys,volume,density,density_atomic,energy_per_atom,formation_energy_per_atom,is_stable,band_gap,cbm,...,is_gap_direct,is_metal,is_magnetic,ordering,total_magnetization,total_magnetization_normalized_vol,total_magnetization_normalized_formula_units,symmetry,structure,energy_above_hull
43,10,Ba-Cd-Mo-O,152.172247,6.318198,15.217225,-7.002457,-2.466738,True,2.5376,4.6264,...,False,False,False,NM,0.000024,1.550874e-07,0.000024,"{'crystal_system': 'Cubic', 'symbol': 'Fm-3m',...","{'@module': 'pymatgen.core.structure', '@class...",0.000000
44,10,Ba-Cd-O-Os,150.605592,7.423540,15.060559,-6.794457,-2.220566,True,0.0000,,...,False,True,True,FM,1.999998,1.327971e-02,1.999998,"{'crystal_system': 'Cubic', 'symbol': 'Fm-3m',...","{'@module': 'pymatgen.core.structure', '@class...",0.000000
45,10,Ba-Cd-O-Re,151.219499,7.349226,15.121950,-7.155684,-2.459718,False,0.0000,,...,False,True,True,FM,1.000072,6.613381e-03,1.000072,"{'crystal_system': 'Cubic', 'symbol': 'Fm-3m',...","{'@module': 'pymatgen.core.structure', '@class...",0.000618
71,20,Ba-Co-Mo-O,276.766068,6.306065,13.838303,-7.613764,-2.460704,True,0.0000,,...,False,True,True,AFM,0.010982,3.968008e-05,0.005491,"{'crystal_system': 'Cubic', 'symbol': 'Fm-3m',...","{'@module': 'pymatgen.core.structure', '@class...",0.000000
72,20,Ba-Co-Mo-O,277.167172,6.296940,13.858359,-7.604548,-2.451488,False,1.2740,4.2504,...,True,False,True,AFM,0.000002,7.576655e-09,0.000001,"{'crystal_system': 'Tetragonal', 'symbol': 'I4...","{'@module': 'pymatgen.core.structure', '@class...",0.009216
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4812,10,Mg-Mn-O-V,129.444950,3.554968,12.944495,-8.336524,-2.469136,False,1.3969,3.9848,...,False,False,True,FM,7.000000,5.407704e-02,7.000000,"{'crystal_system': 'Monoclinic', 'symbol': 'Cm...","{'@module': 'pymatgen.core.structure', '@class...",0.076003
4813,10,Mg-Mn-O-V,129.949666,3.541161,12.994967,-8.335944,-2.468556,False,0.9750,4.1636,...,False,False,True,FM,3.000000,2.308586e-02,3.000000,"{'crystal_system': 'Monoclinic', 'symbol': 'Cm...","{'@module': 'pymatgen.core.structure', '@class...",0.076583
4814,10,Mg-Mo-S-W,323.980634,3.486905,32.398063,-7.327917,-1.005663,False,0.0000,,...,False,True,False,NM,0.000017,5.308959e-08,0.000017,"{'crystal_system': 'Trigonal', 'symbol': 'P3m1...","{'@module': 'pymatgen.core.structure', '@class...",0.206342
4815,10,Mg-Mo-S-W,324.061869,3.486031,32.406187,-7.321808,-0.999554,False,0.0000,,...,False,True,False,NM,0.000654,2.017825e-06,0.000654,"{'crystal_system': 'Trigonal', 'symbol': 'P3m1...","{'@module': 'pymatgen.core.structure', '@class...",0.212451


In [14]:
refined_perovskite_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 920 entries, 43 to 4816
Data columns (total 22 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   nsites                                        920 non-null    int64  
 1   chemsys                                       920 non-null    object 
 2   volume                                        920 non-null    float64
 3   density                                       920 non-null    float64
 4   density_atomic                                920 non-null    float64
 5   energy_per_atom                               920 non-null    float64
 6   formation_energy_per_atom                     920 non-null    float64
 7   is_stable                                     920 non-null    bool   
 8   band_gap                                      920 non-null    float64
 9   cbm                                           594 non-null    float6

In [15]:
# Now, as we see above, the structure and symmetry columns don't look good
# We need to extract the information useful for us from those columns and make separate columns

# Extracting crystal_system
refined_perovskite_df['crystal_system'] = refined_perovskite_df['symmetry'].apply(lambda x: x['crystal_system'])

# Extracting abc and angles
def extract_abc_angles(structure):
    abc = structure['lattice']['a'], structure['lattice']['b'], structure['lattice']['c']
    angles = structure['lattice']['alpha'], structure['lattice']['beta'], structure['lattice']['gamma']
    return pd.Series({'a': abc[0], 'b': abc[1], 'c': abc[2], 'alpha': angles[0], 'beta': angles[1], 'gamma': angles[2]})

refined_perovskite_df[['a', 'b', 'c', 'alpha', 'beta', 'gamma']] = refined_perovskite_df['structure'].apply(extract_abc_angles)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refined_perovskite_df['crystal_system'] = refined_perovskite_df['symmetry'].apply(lambda x: x['crystal_system'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refined_perovskite_df[['a', 'b', 'c', 'alpha', 'beta', 'gamma']] = refined_perovskite_df['structure'].apply(extract_abc_angles)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.ht

In [16]:
refined_perovskite_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 920 entries, 43 to 4816
Data columns (total 29 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   nsites                                        920 non-null    int64  
 1   chemsys                                       920 non-null    object 
 2   volume                                        920 non-null    float64
 3   density                                       920 non-null    float64
 4   density_atomic                                920 non-null    float64
 5   energy_per_atom                               920 non-null    float64
 6   formation_energy_per_atom                     920 non-null    float64
 7   is_stable                                     920 non-null    bool   
 8   band_gap                                      920 non-null    float64
 9   cbm                                           594 non-null    float6

In [17]:
# We can observe that we have successfully extracted the crystal_system, abc and angles
# Now we have to extract data from the chemsys column. We need to separate the elements and store in separate columns

# Extracting chemsys data
# Split the chemsys column into separate elements
split_chemsys = refined_perovskite_df['chemsys'].str.split('-', expand=True)

# Assign new columns for each element
refined_perovskite_df['element_1'] = split_chemsys[0]
refined_perovskite_df['element_2'] = split_chemsys[1]
refined_perovskite_df['element_3'] = split_chemsys[2]
refined_perovskite_df['element_4'] = split_chemsys[3]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refined_perovskite_df['element_1'] = split_chemsys[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refined_perovskite_df['element_2'] = split_chemsys[1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  refined_perovskite_df['element_3'] = split_chemsys[2]
A value is trying to be set on a copy of a 

In [18]:
refined_perovskite_df

Unnamed: 0,nsites,chemsys,volume,density,density_atomic,energy_per_atom,formation_energy_per_atom,is_stable,band_gap,cbm,...,a,b,c,alpha,beta,gamma,element_1,element_2,element_3,element_4
43,10,Ba-Cd-Mo-O,152.172247,6.318198,15.217225,-7.002457,-2.466738,True,2.5376,4.6264,...,5.992621,5.992621,5.992621,60.000000,60.000000,60.000000,Ba,Cd,Mo,O
44,10,Ba-Cd-O-Os,150.605592,7.423540,15.060559,-6.794457,-2.220566,True,0.0000,,...,5.971985,5.971985,5.971985,60.000000,60.000000,60.000000,Ba,Cd,O,Os
45,10,Ba-Cd-O-Re,151.219499,7.349226,15.121950,-7.155684,-2.459718,False,0.0000,,...,5.980088,5.980088,5.980088,60.000000,60.000000,60.000000,Ba,Cd,O,Re
71,20,Ba-Co-Mo-O,276.766068,6.306065,13.838303,-7.613764,-2.460704,True,0.0000,,...,5.802050,5.805115,10.057978,73.233319,90.021108,119.962715,Ba,Co,Mo,O
72,20,Ba-Co-Mo-O,277.167172,6.296940,13.858359,-7.604548,-2.451488,False,1.2740,4.2504,...,5.734643,9.997355,5.848630,91.283135,119.353138,73.335321,Ba,Co,Mo,O
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4812,10,Mg-Mn-O-V,129.444950,3.554968,12.944495,-8.336524,-2.469136,False,1.3969,3.9848,...,3.970616,5.316043,6.611797,88.911303,89.993353,111.919883,Mg,Mn,O,V
4813,10,Mg-Mn-O-V,129.949666,3.541161,12.994967,-8.335944,-2.468556,False,0.9750,4.1636,...,3.897674,5.248721,6.841382,90.848845,89.995900,111.782805,Mg,Mn,O,V
4814,10,Mg-Mo-S-W,323.980634,3.486905,32.398063,-7.327917,-1.005663,False,0.0000,,...,3.190577,3.190579,36.749345,89.999371,90.000000,119.999987,Mg,Mo,S,W
4815,10,Mg-Mo-S-W,324.061869,3.486031,32.406187,-7.321808,-0.999554,False,0.0000,,...,3.188702,3.188693,36.801964,90.002759,90.000000,120.000089,Mg,Mo,S,W
