# Exploring Chemical Systems

The [GNoME](https://www.nature.com/articles/s41586-023-06735-9) dataset adds many new computationally-stable crystals to various families of interest. This colab, similar to the decomposition energy one, extracts families of interest from the provided datasets and uses Pymatgen's plotting capabilities to compare to stable entries from a snapshot of Materails Project.

# Import Libraries

In [None]:
!pip install pymatgen

In [None]:
from typing import List, Tuple

import itertools
import json
import os
import pandas as pd
import re

import pymatgen as mg
from pymatgen.entries.computed_entries import ComputedEntry
from pymatgen.analysis import phase_diagram

## Download the Dataset

In [None]:
PUBLIC_LINK = "https://storage.googleapis.com/"
BUCKET_NAME = "gdm_materials_discovery"

FOLDER_NAME = "gnome_data"
FILES = (
    "stable_materials_summary.csv",
)

EXTERNAL_FOLDER_NAME = "external_data"
EXTERNAL_FILES = (
    "mp_snapshot_summary.csv",
    "external_materials_summary.csv",
)

def download_from_link(link: str, output_dir: str):
  """Download a file using wget."""
  os.system(f"wget {link} -P {output_dir}")

parent_directory = os.path.join(PUBLIC_LINK, BUCKET_NAME)
for filename in FILES:
  public_link = os.path.join(parent_directory, FOLDER_NAME, filename)
  download_from_link(public_link, '.')

for filename in EXTERNAL_FILES:
  public_link = os.path.join(parent_directory, EXTERNAL_FOLDER_NAME, filename)
  download_from_link(public_link, '.')

## Preprocess the GNoME Dataset



In [None]:
gnome_crystals = pd.read_csv('stable_materials_summary.csv', index_col=0)
gnome_crystals

In [None]:
reference_crystals = pd.read_csv('external_materials_summary.csv')
reference_crystals

In [None]:
# Snapshot of Materials Project with stable entries
mp_crystals = pd.read_csv('mp_snapshot_summary.csv')
mp_crystals

In [None]:
def annotate_chemical_system(crystals: pd.DataFrame) -> pd.DataFrame:
  chemical_systems = []
  for i, e in enumerate(crystals['Elements']):
    # replace single quotes with double quotes to avoid having to use python eval
    chemsys = json.loads(e.replace("'", '"'))
    chemical_systems.append(tuple(sorted(chemsys)))
  crystals['Chemical System'] = chemical_systems
  return crystals

In [None]:
# Preprocess crystal structure
gnome_crystals = annotate_chemical_system(gnome_crystals)
reference_crystals = annotate_chemical_system(reference_crystals)
mp_crystals = annotate_chemical_system(mp_crystals)

In [None]:
all_crystals = pd.concat([gnome_crystals, reference_crystals], ignore_index=True)
required_columns = ['Composition', 'NSites', 'Corrected Energy', 'Formation Energy Per Atom', 'Chemical System']
minimal_entries = all_crystals[required_columns]

In [None]:
# Group entries by chemical system in order to allow for aggregation
grouped_entries = minimal_entries.groupby('Chemical System')
mp_entries = mp_crystals.groupby('Chemical System')

## Choose a Chemical System

If not provided, a random binary, ternary, or quaternary system is chosen.

In [None]:
# @title Provide Details of Chemical System
# @markdown To explore the chemical system, please provide a separated list of elements.
# @markdown If no data is provided a random structure will be chosen.

chemsys = '' # @param {type:"string"}

if chemsys == '' or chemsys == 'random':
  # Choose a random crystal in GNoME and visualize the convex hull
  plottable = gnome_crystals[gnome_crystals['Chemical System'].map(len) < 4]
  sample = plottable.sample()
  chemsys = sample['Chemical System'].item()
  print("No chemical system provided.")
  print(f"A random chemical system was chosen: {chemsys}")
else:
  chemsys = tuple(re.split('\W+', chemsys))

## Gather Entries from the Chemical System

In [None]:
def collect_phase_diagram_entries(
    chemsys: Tuple[str, ...],
    grouped_entries: pd.core.groupby.generic.DataFrameGroupBy,
    minimal_entries: pd.DataFrame
) -> List[ComputedEntry]:
  phase_diagram_entries = []
  for length in range(len(chemsys) + 1):
    for subsystem in itertools.combinations(chemsys, length):
      subsystem_key = tuple(sorted(subsystem))
      subsystem_entries = grouped_entries.groups.get(subsystem_key, [])
      if len(subsystem_entries):
        phase_diagram_entries.append(minimal_entries.iloc[subsystem_entries])
  phase_diagram_entries = pd.concat(phase_diagram_entries)

  mg_entries = []

  for _, row in phase_diagram_entries.iterrows():
    composition = row['Composition']
    formation_energy = row['Corrected Energy']
    entry = ComputedEntry(composition, formation_energy)
    mg_entries.append(entry)

  return mg_entries

In [None]:
# Convert to mg.ComputedEntries for used with phase_diagram tooling
gnome_phase_diagram_entries = collect_phase_diagram_entries(chemsys, grouped_entries, all_crystals)
mp_phase_diagram_entries = collect_phase_diagram_entries(chemsys, mp_entries, mp_crystals)

## Compute Phase Diagrams

In [None]:
gnome_phase_diagram = phase_diagram.PhaseDiagram(gnome_phase_diagram_entries)
mp_phase_diagram = phase_diagram.PhaseDiagram(mp_phase_diagram_entries)

## GNoME Phase Diagram for the Chemical System

In [None]:
gnome_pd_plot = phase_diagram.PDPlotter(gnome_phase_diagram)
gnome_pd_plot.get_plot()

## Materials Project Phase Diagram for the Chemical System

Below, we plot the phase diagram from a snapshot of Materials Project.

In [None]:
mp_pd_plot = phase_diagram.PDPlotter(mp_phase_diagram)
mp_pd_plot.get_plot()

## Joint Convex Hull


In [None]:
joint_phase_diagram = phase_diagram.PhaseDiagram(
    gnome_phase_diagram_entries + mp_phase_diagram_entries)
joint_pd_plot = phase_diagram.PDPlotter(joint_phase_diagram)
joint_pd_plot.get_plot()

# Plot GNoME Convex Hull in a Single Cell

Finally, we provide a single cell that can plot the GNoME convex hull. The form on the right can be used to rapidly plot new chemical systems.

In [None]:
# @title Provide Details of Chemical System
# @markdown To explore the chemical system, please provide a separated list of elements.
# @markdown If no data is provided a random structure will be chosen.

chemsys = 'Na-Zn-P' # @param {type:"string"}

assert chemsys, "No chemical system provided"
chemsys = tuple(re.split('\W+', chemsys))
gnome_phase_diagram_entries = collect_phase_diagram_entries(chemsys, grouped_entries, all_crystals)
gnome_phase_diagram = phase_diagram.PhaseDiagram(gnome_phase_diagram_entries)
gnome_pd_plot = phase_diagram.PDPlotter(gnome_phase_diagram)
gnome_pd_plot.get_plot()