In [2]:
from pymatgen.core import Composition

def calculate_normalized_composition(formula: str):
    """
    Calculate the normalized composition for a given formula.
    """
    composition = Composition(formula)
    normalized = composition.get_el_amt_dict()
    total = sum(normalized.values())
    normalized = {element: amount / total for element, amount in normalized.items()}
    return normalized

def combine_compositions_with_fractions(compositions, fractions):
    """
    Combine multiple compositions with their respective fractions.
    """
    combined_composition = Composition({})
    for comp, frac in zip(compositions, fractions):
        combined_composition += Composition(comp) * frac
    return calculate_normalized_composition(str(combined_composition.formula))

def compare_compositions(precursors, products, product_fractions):
    """
    Compare the normalized compositions of precursors before synthesis
    with the combined products after synthesis.
    """
    # Calculate combined composition of precursors
    combined_precursors = Composition({})
    for precursor in precursors:
        combined_precursors += Composition(precursor)
    normalized_before = calculate_normalized_composition(str(combined_precursors.formula))
    
    # Combine products using their fractions
    normalized_after = combine_compositions_with_fractions(products, product_fractions)
    
    return normalized_before, normalized_after

def check_composition_balance(normalized_before, normalized_after, tolerance=1e-3):
    """
    Check whether the normalized compositions before and after synthesis
    are balanced within a specified tolerance.
    """
    elements = set(normalized_before.keys()).union(set(normalized_after.keys()))
    for element in elements:
        before_value = normalized_before.get(element, 0)
        after_value = normalized_after.get(element, 0)
        if abs(before_value - after_value) > tolerance:
            return False, element
    return True, None

# Define precursors, products, and product fractions
precursors = ["V2O5", "Na2CO3","C"]  # Example precursors
products = ["V2O3" ,"CO"]        # Example precursors        # Example products
product_fractions = [0.59, 0.41]          # Molar ratios or weight fractions of products
 
# Calculate and compare normalized compositions
normalized_before, normalized_after = compare_compositions(precursors, products, product_fractions)

# Check composition balance
is_balanced, unbalanced_element = check_composition_balance(normalized_before, normalized_after)

# Print results
print("Normalized Composition Before Synthesis (Precursors):")
for element, fraction in normalized_before.items():
    print(f"  {element}: {fraction:.4f}")

print("\nNormalized Composition After Synthesis (Products):")
for element, fraction in normalized_after.items():
    print(f"  {element}: {fraction:.4f}")

if is_balanced:
    print("\nThe compositions are balanced.")
else:
    print(f"\nThe compositions are not balanced. Imbalance detected in element: {unbalanced_element}")

Normalized Composition Before Synthesis (Precursors):
  Na: 0.1429
  V: 0.1429
  C: 0.1429
  O: 0.5714

Normalized Composition After Synthesis (Products):
  V: 0.3130
  C: 0.1088
  O: 0.5782

The compositions are not balanced. Imbalance detected in element: C


In [3]:
from pymatgen.core import Composition

def calculate_normalized_composition(formula: str):
    """
    Calculate the normalized composition for a given formula.
    """
    composition = Composition(formula)
    normalized = composition.get_el_amt_dict()
    total = sum(normalized.values())
    normalized = {element: amount / total for element, amount in normalized.items()}
    return normalized

def combine_compositions_with_fractions(compositions, fractions):
    """
    Combine multiple compositions with their respective fractions.
    """
    combined_composition = Composition({})
    for comp, frac in zip(compositions, fractions):
        combined_composition += Composition(comp) * frac
    return calculate_normalized_composition(str(combined_composition.formula))

def compare_compositions(precursors, products, product_fractions):
    """
    Compare the normalized compositions of precursors before synthesis
    with the combined products after synthesis.
    """
    # Calculate combined composition of precursors
    combined_precursors = Composition({})
    for precursor in precursors:
        combined_precursors += Composition(precursor)
    normalized_before = calculate_normalized_composition(str(combined_precursors.formula))
    
    # Combine products using their fractions
    normalized_after = combine_compositions_with_fractions(products, product_fractions)
    
    return normalized_before, normalized_after

def calculate_balance_score(normalized_before, normalized_after):
    """
    Calculate a balance score between 0 and 1 based on the difference
    between normalized compositions.
    """
    elements = set(normalized_before.keys()).union(set(normalized_after.keys()))
    total_difference = 0
    for element in elements:
        before_value = normalized_before.get(element, 0)
        after_value = normalized_after.get(element, 0)
        total_difference += abs(before_value - after_value)
    
    # Scale score between 0 and 1
    score = 1 - (total_difference / 2)
    return max(0, min(1, score))  # Ensure score is within bounds

# Define precursors, products, and product fractions
precursors = ["V2O5", "Na2CO3","C"]  # Example precursors
products = ["V2O3" ,"CO"]        # Example precursors        # Example products
product_fractions = [0.59, 0.41]     
# Calculate and compare normalized compositions
normalized_before, normalized_after = compare_compositions(precursors, products, product_fractions)

# Calculate the balance score
balance_score = calculate_balance_score(normalized_before, normalized_after)

# Print results
print("Normalized Composition Before Synthesis (Precursors):")
for element, fraction in normalized_before.items():
    print(f"  {element}: {fraction:.4f}")

print("\nNormalized Composition After Synthesis (Products):")
for element, fraction in normalized_after.items():
    print(f"  {element}: {fraction:.4f}")

print(f"\nBalanced Score: {balance_score:.4f}")

Normalized Composition Before Synthesis (Precursors):
  Na: 0.1429
  V: 0.1429
  C: 0.1429
  O: 0.5714

Normalized Composition After Synthesis (Products):
  V: 0.3130
  C: 0.1088
  O: 0.5782

Balanced Score: 0.8230


In [4]:
def check_phase_in_precursors(precursors, products):
    """
    Check for each product whether it matches the starting materials (precursors).
    
    Args:
        precursors (list of str): List of precursor formulas.
        products (list of str): List of product formulas.
        
    Returns:
        dict: A dictionary where keys are product formulas and values are 0 or 1.
              1 means the product is a precursor, 0 means it is not.
    """
    # Convert precursors to a set of their reduced formulas
    precursor_set = {str(Composition(precursor).reduced_formula) for precursor in precursors}
    
    # Create a dictionary to store results for each product
    results = {}
    
    # Check each product
    for product in products:
        product_formula = str(Composition(product).reduced_formula)
        results[product] = 1 if product_formula in precursor_set else 0
    
    return results

# Example Inputs
# precursors = ["CaCO3", "RuO2"]          # Example precursors
# products = ["CaCO3", "CaRuO3", "RuO2"]  # Example products
precursors = ["CaCO3", "V2O5"]  # Example precursors
products = ["CaC2" ,"CaVO5"]   
# Check if each product phase is in the precursors
results = check_phase_in_precursors(precursors, products)

# Print Results
print("Product Evaluation:")
for product, value in results.items():
    print(f"  {product}: {'Yes' if value == 1 else 'No'} ({value})")

Product Evaluation:
  CaC2: No (0)
  CaVO5: No (0)


In [5]:
from pymatgen.core import Composition

def calculate_normalized_composition(formula: str):
    """
    Calculate the normalized composition for a given formula.
    """
    composition = Composition(formula)
    normalized = composition.get_el_amt_dict()
    total = sum(normalized.values())
    normalized = {element: amount / total for element, amount in normalized.items()}
    return normalized

def combine_compositions_with_fractions(compositions, fractions):
    """
    Combine multiple compositions with their respective fractions.
    """
    combined_composition = Composition({})
    for comp, frac in zip(compositions, fractions):
        combined_composition += Composition(comp) * frac
    return calculate_normalized_composition(str(combined_composition.formula))

def compare_compositions(precursors, products, product_fractions):
    """
    Compare the normalized compositions of precursors before synthesis
    with the combined products after synthesis.
    """
    # Calculate combined composition of precursors
    combined_precursors = Composition({})
    for precursor in precursors:
        combined_precursors += Composition(precursor)
    normalized_before = calculate_normalized_composition(str(combined_precursors.formula))
    
    # Combine products using their fractions
    normalized_after = combine_compositions_with_fractions(products, product_fractions)
    
    return normalized_before, normalized_after

def calculate_balance_score(normalized_before, normalized_after):
    """
    Calculate a balance score between 0 and 1 based on the difference
    between normalized compositions.
    """
    elements = set(normalized_before.keys()).union(set(normalized_after.keys()))
    total_difference = 0
    for element in elements:
        before_value = normalized_before.get(element, 0)
        after_value = normalized_after.get(element, 0)
        total_difference += abs(before_value - after_value)
    
    # Scale score between 0 and 1
    score = 1 - (total_difference / 2)
    return max(0, min(1, score))  # Ensure score is within bounds

def calculate_per_product_scores(precursors, products, product_fractions):
    """
    Calculate a balance score per product based on how well its normalized
    composition matches the precursors.
    """
    # Get normalized composition of precursors
    combined_precursors = Composition({})
    for precursor in precursors:
        combined_precursors += Composition(precursor)
    normalized_precursors = calculate_normalized_composition(str(combined_precursors.formula))
    
    # Calculate score for each product
    product_scores = {}
    for product, fraction in zip(products, product_fractions):
        normalized_product = calculate_normalized_composition(product)
        score = calculate_balance_score(normalized_precursors, normalized_product)
        product_scores[product] = score * fraction  # Weight by fraction contribution
    
    return product_scores

# Define precursors, products, and product fractions
precursors = ["V2O5", "Na2CO3", "C"]  # Example precursors
products = ["V2O3", "CO"]             # Example products
product_fractions = [0.59, 0.41]      # Molar or weight fractions of products

# Calculate per-product scores
per_product_scores = calculate_per_product_scores(precursors, products, product_fractions)

# Print Results
print("Per-Product Balance Scores:")
for product, score in per_product_scores.items():
    print(f"  {product}: {score:.4f}")

Per-Product Balance Scores:
  V2O3: 0.4214
  CO: 0.2636


# The final composition balance used

In [6]:
def calculate_composition_balance_score_refined(target_composition, output_composition):
    """
    Compute composition balance score between target and output compositions.

    Args:
        target_composition (Composition): Normalized target composition.
        output_composition (dict): Normalized output composition as a dictionary.

    Returns:
        float: Composition balance score (0 to 1, where 1 is a perfect match).
    """
    target_elements = target_composition.elements
    output_elements = output_composition.keys()

    # Initialize penalties
    total_penalty = 0.0
    total_weight = 0.0

    # Compute penalties for missing/mismatched elements
    for element in target_elements:
        target_amount = target_composition[element]
        output_amount = output_composition[element] if element in output_elements else 0.0

        difference = abs(target_amount - output_amount)

        # Scalable penalty: larger differences are penalized more
        if output_amount == 0.0:
            penalty = 1.0  # Maximum penalty for missing elements
        else:
            penalty = difference ** 2  # Quadratic penalty for differences

        total_penalty += penalty * target_amount
        total_weight += target_amount

    # Normalize penalties
    normalized_penalty = total_penalty / total_weight if total_weight > 0 else 1.0
    balance_score = 1 - normalized_penalty  # Convert penalty to a score (higher is better)

    return max(0, min(1, balance_score))  # Ensure score is within [0,1] 

def calculate_composition_balance_score_refined_(target_composition, output_composition):
    """
    Compute composition balance score between target and output compositions.

    Args:
        target_composition (Composition): Normalized target composition.
        output_composition (dict): Normalized output composition as a dictionary.

    Returns:
        float: Composition balance score (0 to 1, where 1 is a perfect match).
    """
    target_elements = target_composition.elements
    output_elements = output_composition.keys()

    total_penalty = 0.0
    total_weight = 0.0

    # --- Part 1: Penalize mismatches for elements in the target ---
    for element in target_elements:
        target_amount = target_composition[element]
        output_amount = output_composition.get(element, 0.0)

        difference = abs(target_amount - output_amount)
        if output_amount == 0.0:
            penalty = 1.0  # Max penalty for missing target elements
        else:
            penalty = difference ** 2  # Quadratic mismatch penalty

        total_penalty += penalty * target_amount
        total_weight += target_amount

    # --- Part 2: Penalize unexpected elements in the output ---
    for element in output_elements:
        if element not in target_elements:
            output_amount = output_composition[element]
            penalty = 1.0  # Full penalty for unexpected element
            total_penalty += penalty * output_amount
            total_weight += output_amount  # Use output mass to weigh penalty

    # Final score computation
    normalized_penalty = total_penalty / total_weight if total_weight > 0 else 1.0
    balance_score = 1 - normalized_penalty

    return max(0, min(1, balance_score))  # Clip to [0, 1]
def calculate_composition_balance_score_refined2(target_composition, output_composition):
    """
    Compute composition balance score between target and output compositions.

    Penalizes:
        - Missing elements in the output
        - Deviations in shared elements (quadratically)
        - Extra elements in the output (max penalty * their fraction)

    Normalizes only over target composition to avoid dilution from extra elements.

    Args:
        target_composition (Composition): Normalized target composition.
        output_composition (dict): Normalized output composition as a dictionary.

    Returns:
        float: Composition balance score (0 to 1, where 1 is a perfect match).
    """
    target_elements = target_composition.elements
    output_elements = output_composition.keys()

    total_penalty = 0.0
    target_total_weight = 0.0  # Denominator only includes target elements

    # 1. Penalize missing/mismatched target elements
    for element in target_elements:
        target_amount = target_composition[element]
        output_amount = output_composition.get(element, 0.0)

        difference = abs(target_amount - output_amount)
        if output_amount == 0.0:
            penalty = 1.0  # max penalty for completely missing
        else:
            penalty = difference ** 2  # quadratic penalty

        total_penalty += penalty * target_amount
        target_total_weight += target_amount

    # 2. Penalize unexpected (extra) elements in output
    for element in output_elements:
        if element not in target_elements:
            output_amount = output_composition[element]
            penalty = 1.0  # max penalty for extra elements
            total_penalty += penalty * output_amount
            # DO NOT add to target_total_weight

    # Normalize and clip to [0, 1]
    normalized_penalty = total_penalty / target_total_weight if target_total_weight > 0 else 1.0
    balance_score = 1 - normalized_penalty
    return max(0, min(1, balance_score))

In [7]:
def remove_elements_from_composition(target, elements_to_remove):
    # Parse the composition
    composition = Composition(target)

    # Create a new composition dictionary without the unwanted elements
    filtered_composition = {
        el: amt for el, amt in composition.get_el_amt_dict().items() if el not in elements_to_remove
    }

    # Create a new Composition object from the filtered dictionary
    new_composition = Composition(filtered_composition)

    # Return the updated formula as a compact string (remove spaces)
    return new_composition.formula.replace("1", "").replace(" ", "")

def normalize_composition(target):

    # Create a new Composition object from the filtered dictionary
    new_composition = Composition(target)
    # Normalize the composition to get fractional values
    normalized_composition = new_composition.fractional_composition

    # Format the normalized composition as a string
    normalized_formula = normalized_composition.formula.replace(" ", "")
    
    return normalized_formula


import re
import ast
from pymatgen.core.composition import Composition

def cleanup_phases(phases):
    """
    Convert phase compositions into integer formulas and preserve space group numbers.
    
    Examples:
        ['C1.9992O1.9992_194_(icsd_37237)-None'] → ['CO_194']
        ['V2O3_15_(icsd_95762)-11'] → ['V2O3_15']
    """
    new_phases = []
    for phase in phases:
        try:
            # Extract only the chemical formula (before the first underscore)
            chemical_formula = phase.split('_')[0]

            # Convert to integer formula
            comp = Composition(chemical_formula)
            integer_formula = comp.get_integer_formula_and_factor(max_denominator=10)[0]

            # Completely remove underscores and metadata
            cleaned_phase = integer_formula.strip()

            new_phases.append(cleaned_phase)
        except Exception as e:
            print(f" Error processing phase {phase}: {e}")
            new_phases.append(phase)  # Keep original if error occurs

    return new_phases

In [8]:
def calculate_chemical_factors(target_raw, products):
    """
    Calculate composition balance scores for each interpretation.
    
    Parameters:
        synthesis_csv (str): Path to the synthesis data CSV file.
        interpretations (dict): Dictionary where each key is an interpretation name,
                                and each value contains phase info.

    Returns:
        dict: Composition balance scores for each interpretation.
    """

   
    print("\nTarget Composition:", target_raw)

    # Remove unwanted elements (like O, C, N, H) from the target composition
    elements_to_remove = ["O", "C", "N", "H"]
    cleaned_target = remove_elements_from_composition(target_raw, elements_to_remove)
    normalized_target = normalize_composition(cleaned_target)
    # print("Normalized target: ", normalized_target)

    target_composition = Composition(target_raw)
    filtered_target = {
        el: amt for el, amt in target_composition.get_el_amt_dict().items() if el not in elements_to_remove
    }
    normalized_target = Composition(filtered_target).fractional_composition
    print("Normalized Target Composition:", normalized_target)

    # Dictionary to store balance scores
    balance_scores = {}
    

    
    raw_products = products["phases"]
    cleaned_products = cleanup_phases(raw_products)
    print("Raw Output Phases:", raw_products)
    # print("Cleaned Output Phases:", cleaned_products)

    weight_fractions = products.get("weight_fraction", [])
    print("Weight Fractions:", weight_fractions)

    # Normalize product compositions
    normalized_compositions = []
    for phase, weight in zip(cleaned_products, weight_fractions):
        try:
            composition = Composition(phase)

            # Remove unwanted elements
            filtered_composition = {
                el: amt for el, amt in composition.get_el_amt_dict().items() if el not in elements_to_remove
            }

            # Normalize and weight the composition
            normalized_composition = Composition(filtered_composition).fractional_composition
            weighted_composition = {el: amt * (weight / 100) for el, amt in normalized_composition.items()}
            normalized_compositions.append(weighted_composition)
        except Exception as e:
            print(f"Error processing phase {phase}: {e}")

    # Combine all normalized compositions
    combined_composition = {}
    for composition in normalized_compositions:
        for el, amt in composition.items():
            combined_composition[el] = combined_composition.get(el, 0) + amt

    # Normalize to ensure total sum is 1
    total = sum(combined_composition.values())
    final_output_composition = {el: round(amt / total, 2) for el, amt in combined_composition.items()}
    print("Normalized Output Composition:", final_output_composition)
    final_composition_str = Composition(final_output_composition).to_pretty_string()
    # print("The other normalised output composition: ",final_composition_str )

    # Compute composition balance score
    balance_score = calculate_composition_balance_score_refined(normalized_target, final_output_composition)

    # Example: green if good, yellow if okay, red if bad
    if balance_score > 0.8:
        color = "\033[92m"  # green
    elif balance_score > 0.5:
        color = "\033[93m"  # yellow
    else:
        color = "\033[91m"  # red

    reset = "\033[0m"
    # print(f"Balance Score: {color}{balance_score:.3f}{reset}")
    print("+++++++++++++++++++++++++++++++")
    balance_score2 = calculate_composition_balance_score_refined2(normalized_target, final_output_composition)

    if balance_score2 > 0.8:
        color = "\033[92m"  # green
    elif balance_score2 > 0.5:
        color = "\033[93m"  # yellow
    else:
        color = "\033[91m"  # red

    reset = "\033[0m"
    print(f"Balance Score: {color}{balance_score2:.3f}{reset}")

    return balance_score

target = "CaVO2"
products1 = {"phases":[ "CaV2O5" ,"Ca5V3O13"], "weight_fraction":[75,25]} 
products2 = {"phases":[ "CaC02"], "weight_fraction":[100]}
products3 = {"phases":[ "CaV2O5" ,"Mg2"], "weight_fraction":[75,25]}
# products4 = {"phases":[ "CaV2O5" ,"Mg2Zn4KFe"], "weight_fraction":[75,25]}
products4 = {"phases":[ "CaV2O5" ,"Mg2Zn4KFe"], "weight_fraction":[25,75]}

pr_list =[products1,products2, products3, products4]
for products in pr_list:
    calculate_chemical_factors(target,products)


Target Composition: CaVO2
Normalized Target Composition: Ca0.5 V0.5
Raw Output Phases: ['CaV2O5', 'Ca5V3O13']
Weight Fractions: [75, 25]
Normalized Output Composition: {Element Ca: 0.41, Element V: 0.59}
+++++++++++++++++++++++++++++++
Balance Score: [92m0.992[0m

Target Composition: CaVO2
Normalized Target Composition: Ca0.5 V0.5
Raw Output Phases: ['CaC02']
Weight Fractions: [100]
Normalized Output Composition: {Element Ca: 1.0}
+++++++++++++++++++++++++++++++
Balance Score: [91m0.375[0m

Target Composition: CaVO2
Normalized Target Composition: Ca0.5 V0.5
Raw Output Phases: ['CaV2O5', 'Mg2']
Weight Fractions: [75, 25]
Normalized Output Composition: {Element Ca: 0.25, Element V: 0.5, Element Mg: 0.25}
+++++++++++++++++++++++++++++++
Balance Score: [93m0.719[0m

Target Composition: CaVO2
Normalized Target Composition: Ca0.5 V0.5
Raw Output Phases: ['Li14V43O114', 'LiV3O8']
Weight Fractions: [99.77, 0.23]
Normalized Output Composition: {Element Li: 0.25, Element V: 0.75}
++++++++

In [9]:
#TRI_63 
target = "LiV3O4"
products4 = {"phases":[ "Li14V43O114" ,"LiV3O8"], "weight_fraction":[99.77,0.23]}
products5 = {"phases":[ "LiV3O8"], "weight_fraction":[100]}
pr_list =[products4, products5]
for products in pr_list:
    calculate_chemical_factors(target,products)


Target Composition: LiV3O4
Normalized Target Composition: Li0.25 V0.75
Raw Output Phases: ['Li14V43O114', 'LiV3O8']
Weight Fractions: [99.77, 0.23]
Normalized Output Composition: {Element Li: 0.25, Element V: 0.75}
+++++++++++++++++++++++++++++++
Balance Score: [92m1.000[0m

Target Composition: LiV3O4
Normalized Target Composition: Li0.25 V0.75
Raw Output Phases: ['LiV3O8']
Weight Fractions: [100]
Normalized Output Composition: {Element Li: 0.25, Element V: 0.75}
+++++++++++++++++++++++++++++++
Balance Score: [92m1.000[0m
