In [14]:
# Clone the repository
!git clone https://github.com/google-deepmind/materials_discovery.git

# Install necessary libraries
!pip install pymatgen

# Import required libraries
import itertools
import json
import os
import pandas as pd

# Correct import for Composition from pymatgen
from pymatgen.core.composition import Composition
from pymatgen.entries.computed_entries import ComputedEntry
from pymatgen.analysis import phase_diagram

# Function to download dataset files
PUBLIC_LINK = "https://storage.googleapis.com/"
BUCKET_NAME = "gdm_materials_discovery"
FOLDER_NAME = "gnome_data"
FILES = (
    "stable_materials_summary.csv",
)

EXTERNAL_FOLDER_NAME = "external_data"
EXTERNAL_FILES = (
    "external_materials_summary.csv",
)

def download_from_link(link: str, output_dir: str):
    """Download a file from a public link using wget."""
    os.system(f"wget {link} -P {output_dir}")

parent_directory = os.path.join(PUBLIC_LINK, BUCKET_NAME)
for filename in FILES:
    public_link = os.path.join(parent_directory, FOLDER_NAME, filename)
    download_from_link(public_link, '.')

for filename in EXTERNAL_FILES:
    public_link = os.path.join(parent_directory, EXTERNAL_FOLDER_NAME, filename)
    download_from_link(public_link, '.')

# Load the datasets
gnome_crystals = pd.read_csv('stable_materials_summary.csv', index_col=0)

# Annotate the chemical system
def annotate_chemical_system(crystals: pd.DataFrame) -> pd.DataFrame:
    """Annotate a summary DataFrame with the chemical system"""
    chemical_systems = []
    for i, e in enumerate(crystals['Elements']):
        # replace single quotes with double quotes to avoid having to use python eval
        chemsys = json.loads(e.replace("'", '"'))

        # provide chemical system in sorted order to make for easier lookup
        chemical_systems.append(tuple(sorted(chemsys)))
    crystals['Chemical System'] = chemical_systems
    return crystals

gnome_crystals = annotate_chemical_system(gnome_crystals)
reference_crystals = pd.read_csv('external_materials_summary.csv')
reference_crystals = annotate_chemical_system(reference_crystals)
all_crystals = pd.concat([gnome_crystals, reference_crystals], ignore_index=True)

required_columns = ['Composition', 'NSites', 'Corrected Energy', 'Formation Energy Per Atom', 'Chemical System']
minimal_entries = all_crystals[required_columns]
grouped_entries = minimal_entries.groupby('Chemical System')

# Function to gather entries for convex hull and check for valid composition
def gather_convex_hull(chemsys):
    phase_diagram_entries = []

    for length in range(len(chemsys) + 1):
        for subsystem in itertools.combinations(chemsys, length):
            subsystem_key = tuple(sorted(subsystem))
            subsystem_entries = grouped_entries.groups.get(subsystem_key, [])

            if len(subsystem_entries):
                phase_diagram_entries.append(minimal_entries.iloc[subsystem_entries])

    phase_diagram_entries = pd.concat(phase_diagram_entries)

    # Convert to mg.ComputedEntries for use with phase_diagram tooling
    mg_entries = []

    for _, row in phase_diagram_entries.iterrows():
        composition = row['Composition']
        formation_energy = row['Corrected Energy']
        entry = ComputedEntry(composition, formation_energy)
        mg_entries.append(entry)

    # Add entries with 0 formation entries for every element
    for element in chemsys:
        elemental_entry = ComputedEntry(element, 0.0)
        mg_entries.append(elemental_entry)

    return mg_entries

# Function to validate the elements in a composition
def validate_composition(composition, valid_elements):
    invalid_elements = [el for el in composition.elements if el not in valid_elements]
    if invalid_elements:
        raise ValueError(f"Composition contains invalid elements: {invalid_elements}")

# Gather list of all unique elements in the phase diagram for validation
all_elements = set()
for chemsys in all_crystals['Chemical System']:
    all_elements.update(chemsys)
valid_elements = all_elements

# Provide Entry Details
composition = 'Ho2Er2ScIr3'  # @param {type:"string"}
energy = -7.2683  # @param {type:"number"}

if composition == '':
    print("No composition provided. Choosing a random crystal.")
    sample = gnome_crystals.sample()
    sample_entry = ComputedEntry(
        composition=sample['Composition'].item(),
        energy=sample['Corrected Energy'].item(),
    )
    chemsys = sample['Chemical System'].item()
else:
    composition = Composition(composition)  # Corrected import
    # Ensure that the composition contains valid elements
    try:
        validate_composition(composition, valid_elements)  # Validate composition before proceeding
        sample_entry = ComputedEntry(
            composition=composition,
            energy=energy,
        )
        chemsys = [str(el) for el in composition.elements]
    except ValueError as e:
        print(f"Error: {e}")
        chemsys = []  # Clear the chemical system in case of error

# If the chemical system is empty (invalid composition), skip the remaining process
if chemsys:
    # Gather convex hull entries
    mg_entries = gather_convex_hull(chemsys)

    # Compute the convex hull for the phase diagram
    diagram = phase_diagram.PhaseDiagram(mg_entries)

    # View the currently sampled entry
    print(f"Sample Entry: {sample_entry}")
    decomposition, decomposition_energy = diagram.get_decomp_and_e_above_hull(sample_entry, allow_negative=True)

    # For a sample from GNoME, this number is likely to be <1e-3 as this was the threshold set for the data release.
    print(f"Decomposition Energy: {decomposition_energy}.")
    print(f"Decomposition: {decomposition}")
else:
    print("Invalid composition, skipping the phase diagram computation.")


fatal: destination path 'materials_discovery' already exists and is not an empty directory.
Error: Composition contains invalid elements: [Element Ho, Element Er, Element Sc, Element Ir]
Invalid composition, skipping the phase diagram computation.
