In [1]:
import numpy as np
from itertools import product
import pandas as pd
import os, json, sys
import matplotlib.pyplot as plt


In [2]:
# cached interpolators
from functools import lru_cache
from scipy.interpolate import interp1d
import numpy as np


def interpolate_element_data(time_data, value_data, fit_type='linear'):
    """
    Interpolates the element data with an option for linear or logarithmic fitting.
    Returns a cached interpolation function.
    """
    time_data = np.array(time_data)
    value_data = np.array(value_data)

    if fit_type == 'log':
        min_positive = np.min(value_data[value_data > 0]) if np.any(value_data > 0) else 1e-10
        value_data = np.log(np.where(value_data > 0, value_data, min_positive))
        interpolator = interp1d(time_data, value_data, kind='linear', fill_value="extrapolate", bounds_error=False)
        # Cache the result of the exponentiated interpolation
        @lru_cache(maxsize=None)  # No limit to cache size
        def cached_interpolator(x):
            return np.exp(interpolator(x))
        return cached_interpolator
    else:
        interpolator = interp1d(time_data, value_data, kind='linear', fill_value="extrapolate")
        # Cache the linear interpolation
        @lru_cache(maxsize=None)  # No limit to cache size
        def cached_interpolator(x):
            return interpolator(x)
        return cached_interpolator

# get a list of the json files in the ../fispact_data directory

def create_interpolators(data_path, fit_type='linear', verbose=False):
    """
    Creates interpolation functions for each element based on data from JSON files.

    Parameters:
    data_path (str): The path to the directory containing the JSON files.

    Returns:
    dict: A dictionary where the keys are element names and the values are interpolation functions.

    """
    json_files = [f for f in os.listdir(data_path) if f.endswith('.json')]
    if verbose:
        print(json_files)

    # Load the data from the json files and create interpolation functions for each element
    element_data = {}
    for file in json_files:
        data = json.load(open(os.path.join('./fispact_data', file), 'r'))
        element = file.split('.')[0]
        time_data = [entry['cooling_time'] for entry in data['inventory_data']]
        value_data = [entry['dose_rate']['dose'] for entry in data['inventory_data']]
        element_data[element] = interpolate_element_data(time_data, value_data, fit_type=fit_type)
    
    return element_data
import pandas as pd
import plotly.express as px

import pandas as pd

def create_dataframe_from_dict(data_dict):
    """
    Converts a dictionary with nested 'composition' and optional 'fitness' into a DataFrame.
    
    Args:
    data_dict (dict): Dictionary with keys as indices and values containing 'composition' and optionally 'fitness'.
    
    Returns:
    pd.DataFrame: DataFrame with compositions and fitness as columns.
    """
    # Prepare lists to hold compositions and fitness values
    compositions = []
    fitness_values = []
    
    # Iterate through the dictionary to extract compositions and fitness values
    for key, value in data_dict.items():
        compositions.append(value['composition'])
        # Check if fitness is present
        if 'fitness' in value:
            fitness_values.append(value['fitness'][0])  # Assuming fitness is always a list with one item

    # Create DataFrame from compositions
    df_compositions = pd.DataFrame(compositions)
    
    # Add fitness column if fitness values were extracted
    if fitness_values:
        df_compositions['Fitness'] = fitness_values
    
    return df_compositions


import plotly.graph_objects as go
import pandas as pd

def create_parallel_coordinates_plot(df, fitness_col=None, in_percent=False, num_compositions=100):
    """
    Creates an interactive parallel coordinates plot from a DataFrame using Plotly Graph Objects,
    limiting the number of compositions displayed to `num_compositions`.

    Args:
    df (pd.DataFrame): DataFrame containing composition data with columns as elements.
    fitness_col (str, optional): Column name for the fitness values, used for coloring.
    in_percent (bool, optional): Whether to convert the composition values to percentages.
    num_compositions (int, optional): The number of compositions to display on the plot.

    Returns:
    go.Figure: The interactive plot.
    """
    if in_percent:
        df = df.copy()  # Create a copy to avoid modifying the original DataFrame
        for col in df.columns:
            if col != fitness_col:
                df[col] *= 100

    # Sort the DataFrame based on fitness column and take the top compositions
    if fitness_col and fitness_col in df.columns:
        df = df.sort_values(by=fitness_col).head(num_compositions)

    # Define labels with or without percentage units
    labels = {col: f"{col} (wt%)" if in_percent else col for col in df.columns if col != fitness_col}
    
    # Setting up dimensions for the parallel coordinates plot
    dimensions = []
    color = None
    for col in df.columns:
        if col != fitness_col:
            dimensions.append(
                go.parcats.Dimension(values=df[col], label=labels[col])
            )
        else:
            # Use the fitness column as a color dimension
            color = df[fitness_col]

    # Create the parallel coordinates plot
    fig = go.Figure(
        data=go.Parcats(
            dimensions=dimensions,
            line=dict(
                color=color,
                colorscale=px.colors.diverging.Tealrose,
                cmin=color.min(),
                cmax=color.max()
            ),
            hoveron='color', # Enable hover for color (dimension values)
            hoverinfo='all',
            labelfont=dict(size=12),
            tickfont=dict(size=10),
            arrangement='freeform'
        )
    )
    
    return fig


In [3]:
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px  # To use for color scales and other utilities

def create_parallel_coordinates_plot(df, fitness_col=None, in_percent=False, num_compositions=100):
    """
    Enhanced interactive parallel coordinates plot with better aesthetics and clear representation of fitness.

    Args:
    df (pd.DataFrame): DataFrame containing composition data.
    fitness_col (str, optional): Column name for the fitness values, used for coloring.
    in_percent (bool, optional): Convert composition values to percentages.
    num_compositions (int, optional): Number of compositions to display on the plot.

    Returns:
    go.Figure: The interactive plot with improved aesthetics.
    """
    if in_percent:
        df = df.copy()  # Avoid modifying the original DataFrame
        for col in df.columns:
            if col != fitness_col:
                df[col] *= 100  # Convert to percentage

    # Sorting and selecting top compositions based on fitness
    if fitness_col and fitness_col in df.columns:
        df = df.sort_values(by=fitness_col, ascending=True).head(num_compositions)  # Sort for better fitness scores

    # Setting labels for dimensions, considering percentage units
    labels = {col: f"{col} (%)" if in_percent else col for col in df.columns if col != fitness_col}

    # Setup dimensions for the plot
    dimensions = [
        go.parcats.Dimension(values=df[col], label=labels[col]) for col in df.columns if col != fitness_col
    ]

    # Determine color scale based on fitness
    color = df[fitness_col] if fitness_col in df.columns else None

    fig = go.Figure(
        data=go.Parcats(
            dimensions=dimensions,
            line=dict(
                color=color,
                colorscale='Viridis',  # Using a perceptually uniform colorscale
                cmin=color.min(),
                cmax=color.max(),
                showscale=True  # Show color scale to indicate fitness values
            ),
            hoveron='color',  # Hover effect on color dimension
            hoverinfo='all',
            labelfont=dict(size=12),
            tickfont=dict(size=10),
            arrangement='freeform'
        )
    )

    fig.update_layout(
        title="Parallel Coordinates Plot for Material Compositions",
        title_font_size=20,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    return fig


In [9]:
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px  # For color scales

def create_parallel_coordinates_plot(df, fitness_col=None, in_percent=False, num_compositions=100):
    if in_percent:
        df = df.copy()  # Avoid modifying the original DataFrame
        for col in df.columns:
            if col != fitness_col:
                df[col] = round(df[col] * 100, 2)  # Convert to percentage and round off

    # Sorting and selecting top compositions based on fitness
    if fitness_col and fitness_col in df.columns:
        df = df.sort_values(by=fitness_col, ascending=True).head(num_compositions)

    # Set up dimensions for the plot without explicit category sorting
    dimensions = [
        go.parcats.Dimension(
            values=df[col].values,  # Use values as they appear in the dataframe
            label=f"{col} (%)" if in_percent else col
        ) for col in df.columns if col != fitness_col
    ]

    # Determine color scale based on fitness
    color = df[fitness_col] if fitness_col in df.columns else None

    fig = go.Figure(
        data=go.Parcats(
            dimensions=dimensions,
            line=dict(
                color=color,
                colorscale='Viridis',  # Using a perceptually uniform colorscale
                cmin=color.min(),
                cmax=color.max(),
                showscale=True  # Show color scale to indicate fitness values
            ),
            hoveron='color',  # Hover effect on color dimension
            hoverinfo='all',
            labelfont=dict(size=12),
            tickfont=dict(size=10),
            arrangement='freeform'
        )
    )

    fig.update_layout(
        title="Parallel Coordinates Plot for Material Compositions",
        title_font_size=20,
        plot_bgcolor='white',
        paper_bgcolor='white'
    )

    return fig


In [5]:
#elements = ['', 'B', 'C', 'D', 'E']  # Example elements
element_interpolators = create_interpolators('./fispact_data', fit_type='log')

elements = list(element_interpolators.keys())
print(elements)


['V', 'Zr', 'Ti', 'W', 'Cr']


In [6]:
import numpy as np
from itertools import product
from tqdm import tqdm  # Make sure tqdm is imported
from itertools import product
import numpy as np

def get_steps(range_tuple, step):
    return np.arange(range_tuple[0], range_tuple[1] + step, step)

def generate_valid_combinations(ranges, tolerance=0.05):
    for combo in product(*ranges):
        if np.abs(sum(combo) - 1.0) <= tolerance:
            yield combo

def adaptive_grid_search(element_interpolators, comp_range, limits, step=0.01, tolerance=0.05):
    elements = list(comp_range.keys())
    ranges = [get_steps(comp_range[element], step) for element in elements]

    # Use a generator to handle combinations
    valid_combinations = list(generate_valid_combinations(ranges, tolerance))
    print(f"Number of valid combinations: {len(valid_combinations)}")  # Debug statement

    compositions_data = {}  # Dictionary to store all compositions and their penalties
    best_score = float('inf')

    # Evaluate each combination using tqdm for the progress bar
    for idx, combo in tqdm(enumerate(valid_combinations), total=len(valid_combinations), desc='Evaluating combinations'):
        composition = dict(zip(elements, combo))
        total_penalty = 0

        # Check each time point and limit
        for limit in limits:
            time = limit['time']
            required_limit = limit['limit']
            importance = limit['importance']

            # Sum the interpolated values scaled by their proportions
            total_value = sum(
                element_interpolators[element](time) * composition[element]
                for element in elements
            )
            delta = max(0, total_value - required_limit)
            total_penalty += delta * importance

        # Save composition and its penalty
        compositions_data[idx] = {'composition': composition, 'fitness': [total_penalty]}

        # Update best score
        if total_penalty < best_score:
            best_score = total_penalty

    return compositions_data

# Example limits
limits = [
    {'time': 3.41e7 / 12, 'limit': 1e4, 'importance': 0.5},
    {'time': 3.41e7 * 2, 'limit': 1e2 / 2, 'importance': 1},
    {'time': 3.41e7 * 10, 'limit': 1e0, 'importance': 2},
    {'time': 3.41e7 * 100, 'limit': 1e-4, 'importance': 4}
]

# Example usage, assuming element_interpolators and limits are defined
comp_range = {'V': (0.75, 1.0), 'Cr': (0.01, 0.25), 'Ti': (0.01, 0.3), 'W': (0.01, 0.2), 'Zr': (0.01, 0.2)}
composition_data = adaptive_grid_search(element_interpolators, comp_range, limits)


Number of valid combinations: 126263


Evaluating combinations: 100%|██████████| 126263/126263 [00:00<00:00, 177574.86it/s]


In [10]:
# Convert the resulting dictionary to a DataFrame
df_compositions = create_dataframe_from_dict(composition_data)
print(df_compositions.head())

# select the df compositions that have a value for column 'V' greater than 0.7
df_compositions = df_compositions[df_compositions['V'] > 0.7]

# randomly select 1000 compositions from df_compositions 
rand_df_compositions = df_compositions.sample(n=1000, random_state=42)

# round the values in the df_compositions to 4 decimal places and sort the values in the V column by highest to lowest
rand_df_compositions = rand_df_compositions.sort_values(by='V', ascending=False)
rand_df_compositions = rand_df_compositions.round(4)

# Optionally create a plot
fig = create_parallel_coordinates_plot(rand_df_compositions, fitness_col='Fitness')
fig.show()
#fig.to_html('parallel_coordinates_plot.html')

# save figure to html 
#fig.write_html("parallel_coordinates_plot.html")

      V    Cr    Ti     W    Zr   Fitness
0  0.75  0.01  0.01  0.01  0.17  0.000309
1  0.75  0.01  0.01  0.01  0.18  0.000350
2  0.75  0.01  0.01  0.01  0.19  0.000390
3  0.75  0.01  0.01  0.01  0.20  0.000430
4  0.75  0.01  0.01  0.02  0.16  0.000271
