In [1]:
from pymatgen.core.periodic_table import Species
Species('Fe', oxidation_state=2).get_shannon_radius(cn="IVSQ",spin="High Spin")

0.64

In [2]:
from mp_api.client import MPRester
with MPRester("6CO1jn2jJUru33xfX27JSfuiWfQIfdNu") as mpr:
    data = mpr.summary.search(
        formula="ABO3",
        fields=["material_id", "formula_pretty", "band_gap", "formation_energy_per_atom", 
                "energy_above_hull", "structure", "theoretical", "symmetry", "density"]
    )


  from .autonotebook import tqdm as notebook_tqdm
  data = mpr.summary.search(
Retrieving SummaryDoc documents: 100%|██████████| 4700/4700 [00:07<00:00, 589.69it/s]


In [3]:
# Filter only perovskite-like
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
filteredPm = []
for entry in data:
    sga = SpacegroupAnalyzer(entry.structure)
    if "perovskite" in sga.get_crystal_system().lower() or entry.formula_pretty.endswith("O3"):
        filteredPm.append(entry)


In [4]:
import pandas as pd
rows = [item.dict() for item in filteredPm]
dfPM = pd.DataFrame(rows)


In [5]:

from jarvis.db.figshare import data  # module dari JARVIS-Tools
from jarvis.core.atoms import Atoms
from pymatgen.core import Structure  # jika perlu konversi

# 1) Load dataset
d = data(dataset="dft_3d")  # dataset “3D materials” JARVIS-DFT :contentReference[oaicite:5]{index=5}

# 2) Filter formula
filtered = []
for entry in d:
    formula = entry.get("formula")
    if formula is None:
        continue
    # pilih formula yang cocok “ABO3” (atau yang mengandung ABO3)
    if formula.upper().endswith("O3") and len(formula.split()) == 1:  # contoh sederhana
        filtered.append(entry)

# 3) Ambil kolom yang Anda mau
rows = []
for entry in filtered:
    jid = entry.get("jid")
    formula = entry.get("formula")
    bandgap = entry.get("optb88vdw_bandgap")  # contoh field
    formation = entry.get("formation_energy_peratom")
    ehull = entry.get("ehull")
    structure_dict = entry.get("atoms")  # struktur dalam dict
    symmetry = entry.get("spacegroup_symbol") if entry.get("spacegroup_symbol") else entry.get("spg_symbols")
    density = entry.get("density")

    rows.append({
        "jid": jid,
        "formula": formula,
        "band_gap": bandgap,
        "formation_energy_per_atom": formation,
        "energy_above_hull": ehull,
        "structure_dict": structure_dict,
        "symmetry": symmetry,
        "density": density
    })

df = pd.DataFrame(rows)
df.head()



Obtaining 3D dataset 76k ...
Reference:https://www.nature.com/articles/s41524-020-00440-1
Other versions:https://doi.org/10.6084/m9.figshare.6815699
Loading the zipfile...
Loading completed.


Unnamed: 0,jid,formula,band_gap,formation_energy_per_atom,energy_above_hull,structure_dict,symmetry,density
0,JVASP-22556,SrFeO3,0.0,-2.07159,0.1769,"{'lattice_mat': [[3.790914410660539, -0.0, 0.0...",,5.836
1,JVASP-49836,CeTiO3,0.0,-3.35291,0.0147,"{'lattice_mat': [[3.9279570605034704, -0.0, 0....",,6.466
2,JVASP-97985,SnHgO3,0.0,-1.13228,0.0432,"{'lattice_mat': [[4.9156345281107905, -0.01178...",,9.323
3,JVASP-49773,LiTaO3,2.446,-2.82215,0.1045,"{'lattice_mat': [[5.41110393365202, 1.70722575...",,6.786
4,JVASP-50935,AlInO3,2.464,-2.48756,0.0205,"{'lattice_mat': [[3.2758766287144128, 2.332629...",,5.641


In [6]:
dfPM.columns


Index(['formula_pretty', 'density', 'symmetry', 'material_id', 'structure',
       'formation_energy_per_atom', 'energy_above_hull', 'band_gap',
       'theoretical', 'fields_not_requested'],
      dtype='object')

In [7]:
df.columns

Index(['jid', 'formula', 'band_gap', 'formation_energy_per_atom',
       'energy_above_hull', 'structure_dict', 'symmetry', 'density'],
      dtype='object')

In [8]:
import pandas as pd

# Contoh daftar kolom
cols_MP = ['formula_pretty', 'density', 'symmetry', 'material_id', 'structure',
            'formation_energy_per_atom', 'energy_above_hull', 'band_gap',
            'theoretical', 'fields_not_requested']

cols_JARVIS = ['jid', 'formula', 'band_gap', 'formation_energy_per_atom',
                'energy_above_hull', 'structure_dict', 'symmetry', 'density']

# Misal dfMP dan dfJarvis sudah ada
# dfMP = pd.DataFrame(...), dfJarvis = pd.DataFrame(...)

# Samakan nama kolom yang maknanya identik
dfJarvis_renamed = df.rename(columns={
    'jid': 'material_id',
    'formula': 'formula_pretty',
    'structure_dict': 'structure'
})

# Gabungkan
df_combined = pd.concat([dfPM, dfJarvis_renamed], ignore_index=True, sort=False)

# Urutkan kolom biar rapi (opsional)
df_combined = df_combined.reindex(sorted(df_combined.columns), axis=1)

print(df_combined.head())


   band_gap   density  energy_above_hull  \
0    4.1024  8.728230           0.000000   
1    0.8071  9.206879           0.792473   
2    2.0031  8.848788           0.000000   
3    0.0000  9.380471           0.000000   
4    0.9888  8.889999           0.000000   

                                fields_not_requested  \
0  [builder_meta, nsites, elements, nelements, co...   
1  [builder_meta, nsites, elements, nelements, co...   
2  [builder_meta, nsites, elements, nelements, co...   
3  [builder_meta, nsites, elements, nelements, co...   
4  [builder_meta, nsites, elements, nelements, co...   

   formation_energy_per_atom formula_pretty material_id  \
0                  -3.690019         AcAlO3    mp-cpiel   
1                  -2.475390          AcBO3    mp-cpica   
2                  -3.138972         AcCrO3    mp-bxhfp   
3                  -2.422892         AcCuO3    mp-bxfac   
4                  -2.771539         AcFeO3    mp-bxaks   

                                           

In [9]:
df_combined

Unnamed: 0,band_gap,density,energy_above_hull,fields_not_requested,formation_energy_per_atom,formula_pretty,material_id,structure,symmetry,theoretical
0,4.1024,8.728230,0.000000,"[builder_meta, nsites, elements, nelements, co...",-3.690019,AcAlO3,mp-cpiel,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
1,0.8071,9.206879,0.792473,"[builder_meta, nsites, elements, nelements, co...",-2.475390,AcBO3,mp-cpica,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
2,2.0031,8.848788,0.000000,"[builder_meta, nsites, elements, nelements, co...",-3.138972,AcCrO3,mp-bxhfp,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
3,0.0000,9.380471,0.000000,"[builder_meta, nsites, elements, nelements, co...",-2.422892,AcCuO3,mp-bxfac,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
4,0.9888,8.889999,0.000000,"[builder_meta, nsites, elements, nelements, co...",-2.771539,AcFeO3,mp-bxaks,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
...,...,...,...,...,...,...,...,...,...,...
5239,3.1210,5.681000,0.000000,,-3.318480,TmTiClO3,JVASP-146828,"{'lattice_mat': [[5.081921480325604, 0.0084075...",,
5240,0.0000,8.073000,0.009700,,-0.294740,ZrGa6Fe3Co3,JVASP-146848,"{'lattice_mat': [[4.600030049947577, -0.000941...",,
5241,0.1570,3.941000,0.064200,,-2.159990,FeSiO3,JVASP-146895,"{'lattice_mat': [[5.128103371117467, 0.0593036...",,
5242,0.0000,7.227000,0.000000,,-2.839040,SmMnO3,JVASP-146898,"{'lattice_mat': [[5.377202234581615, -0.0, 3e-...",,


In [10]:
import pandas as pd
import re

# Asumsikan df_combined sudah ada

# Daftar unsur halogen
halides = {"F", "Cl", "Br", "I", "At"}

# Fungsi untuk memeriksa apakah rumus adalah tipe ABO3 dan mengandung halida
def is_ABO3_halide(formula):
    if not isinstance(formula, str):
        return False

    # Hapus spasi
    formula = formula.strip()

    # Pola sederhana untuk A1B1X3 (contoh: NaTiO3, CsPbBr3, BaSnO3)
    # Bisa juga mendeteksi A dan B satu unsur, X bisa O/F/Cl/Br/I/At
    match = re.match(r"([A-Z][a-z]?)([A-Z][a-z]?)([A-Z][a-z]?)(\d*)", formula)
    if not match:
        return False

    # Cek apakah mengandung salah satu halida atau oksigen
    return any(elem in formula for elem in halides.union({"O"})) and formula.endswith("3")

# Terapkan filter
df_ABO3_halide = df_combined[df_combined["formula_pretty"].apply(is_ABO3_halide)].copy()

# print(df_ABO3_halide.shape)
# print(df_ABO3_halide.head())
df_ABO3_halide

Unnamed: 0,band_gap,density,energy_above_hull,fields_not_requested,formation_energy_per_atom,formula_pretty,material_id,structure,symmetry,theoretical
0,4.1024,8.728230,0.000000,"[builder_meta, nsites, elements, nelements, co...",-3.690019,AcAlO3,mp-cpiel,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
1,0.8071,9.206879,0.792473,"[builder_meta, nsites, elements, nelements, co...",-2.475390,AcBO3,mp-cpica,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
2,2.0031,8.848788,0.000000,"[builder_meta, nsites, elements, nelements, co...",-3.138972,AcCrO3,mp-bxhfp,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
3,0.0000,9.380471,0.000000,"[builder_meta, nsites, elements, nelements, co...",-2.422892,AcCuO3,mp-bxfac,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
4,0.9888,8.889999,0.000000,"[builder_meta, nsites, elements, nelements, co...",-2.771539,AcFeO3,mp-bxaks,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
...,...,...,...,...,...,...,...,...,...,...
5237,3.6490,3.788000,0.024400,,-2.365270,ZnSiO3,JVASP-146785,"{'lattice_mat': [[7.123975525005888, -0.210137...",,
5239,3.1210,5.681000,0.000000,,-3.318480,TmTiClO3,JVASP-146828,"{'lattice_mat': [[5.081921480325604, 0.0084075...",,
5241,0.1570,3.941000,0.064200,,-2.159990,FeSiO3,JVASP-146895,"{'lattice_mat': [[5.128103371117467, 0.0593036...",,
5242,0.0000,7.227000,0.000000,,-2.839040,SmMnO3,JVASP-146898,"{'lattice_mat': [[5.377202234581615, -0.0, 3e-...",,


In [11]:
df_zero_bandgap = df_ABO3_halide[df_ABO3_halide["band_gap"] != 0]


In [12]:
df_zero_bandgap

Unnamed: 0,band_gap,density,energy_above_hull,fields_not_requested,formation_energy_per_atom,formula_pretty,material_id,structure,symmetry,theoretical
0,4.1024,8.728230,0.000000,"[builder_meta, nsites, elements, nelements, co...",-3.690019,AcAlO3,mp-cpiel,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
1,0.8071,9.206879,0.792473,"[builder_meta, nsites, elements, nelements, co...",-2.475390,AcBO3,mp-cpica,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
2,2.0031,8.848788,0.000000,"[builder_meta, nsites, elements, nelements, co...",-3.138972,AcCrO3,mp-bxhfp,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
4,0.9888,8.889999,0.000000,"[builder_meta, nsites, elements, nelements, co...",-2.771539,AcFeO3,mp-bxaks,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
5,2.8959,9.314495,0.000000,"[builder_meta, nsites, elements, nelements, co...",-3.063253,AcGaO3,mp-cpicb,"{'@module': 'pymatgen.core.structure', '@class...","{'crystal_system': 'Cubic', 'symbol': 'Pm-3m',...",True
...,...,...,...,...,...,...,...,...,...,...
5235,3.1480,5.606000,0.000000,,-3.312410,ErTiClO3,JVASP-148222,"{'lattice_mat': [[5.084037278865215, 0.0117868...",,
5236,0.3910,3.574000,0.133400,,-1.923740,LiCoBO3,JVASP-146542,"{'lattice_mat': [[3.1098736323796228, 0.050732...",,
5237,3.6490,3.788000,0.024400,,-2.365270,ZnSiO3,JVASP-146785,"{'lattice_mat': [[7.123975525005888, -0.210137...",,
5239,3.1210,5.681000,0.000000,,-3.318480,TmTiClO3,JVASP-146828,"{'lattice_mat': [[5.081921480325604, 0.0084075...",,


In [20]:
from matminer.featurizers.composition import ElementProperty
from pymatgen.core import Composition
import pandas as pd

# Inisialisasi featurizer Magpie
magpie = ElementProperty.from_preset("magpie")

# Tes 1 senyawa
df = pd.DataFrame({"formula": ["CaTiO3","FeSiO3"], "composition": [Composition("CaTiO3"), Composition("FeSiO3")]})
df_magpie = magpie.featurize_dataframe(df, col_id="composition")

print(df_magpie.columns.tolist())  # tampilkan semua kolom sebenarnya


ElementProperty: 100%|██████████| 2/2 [00:00<00:00, 32.59it/s]


['formula', 'composition', 'MagpieData minimum Number', 'MagpieData maximum Number', 'MagpieData range Number', 'MagpieData mean Number', 'MagpieData avg_dev Number', 'MagpieData mode Number', 'MagpieData minimum MendeleevNumber', 'MagpieData maximum MendeleevNumber', 'MagpieData range MendeleevNumber', 'MagpieData mean MendeleevNumber', 'MagpieData avg_dev MendeleevNumber', 'MagpieData mode MendeleevNumber', 'MagpieData minimum AtomicWeight', 'MagpieData maximum AtomicWeight', 'MagpieData range AtomicWeight', 'MagpieData mean AtomicWeight', 'MagpieData avg_dev AtomicWeight', 'MagpieData mode AtomicWeight', 'MagpieData minimum MeltingT', 'MagpieData maximum MeltingT', 'MagpieData range MeltingT', 'MagpieData mean MeltingT', 'MagpieData avg_dev MeltingT', 'MagpieData mode MeltingT', 'MagpieData minimum Column', 'MagpieData maximum Column', 'MagpieData range Column', 'MagpieData mean Column', 'MagpieData avg_dev Column', 'MagpieData mode Column', 'MagpieData minimum Row', 'MagpieData max

In [15]:
import matminer
print(matminer.__version__)


0.9.3


In [21]:
df_magpie

Unnamed: 0,formula,composition,MagpieData minimum Number,MagpieData maximum Number,MagpieData range Number,MagpieData mean Number,MagpieData avg_dev Number,MagpieData mode Number,MagpieData minimum MendeleevNumber,MagpieData maximum MendeleevNumber,...,MagpieData range GSmagmom,MagpieData mean GSmagmom,MagpieData avg_dev GSmagmom,MagpieData mode GSmagmom,MagpieData minimum SpaceGroupNumber,MagpieData maximum SpaceGroupNumber,MagpieData range SpaceGroupNumber,MagpieData mean SpaceGroupNumber,MagpieData avg_dev SpaceGroupNumber,MagpieData mode SpaceGroupNumber
0,CaTiO3,"(Ca, Ti, O)",8.0,22.0,14.0,13.2,6.24,8.0,7.0,87.0,...,2.3e-05,5e-06,7e-06,0.0,12.0,225.0,213.0,91.0,94.8,12.0
1,FeSiO3,"(Fe, Si, O)",8.0,26.0,18.0,12.8,5.76,8.0,55.0,87.0,...,2.110663,0.422133,0.675412,0.0,12.0,229.0,217.0,98.4,103.68,12.0
