# Custom code for generating response functions & datasets:
- Currently, response functions are multi-dimensional sigmoids meaning all input-output relationships will be monotonic. Eventually, might be nice to support non-monotonic relationships as well, so that certain input features can have an "optimum" with worse performance on either side of the optimum.
- Also note: this currently only works for generating non-formulations datasets. Eventually, want to support formulations as well.

In [20]:
import numpy as np
import pandas as pd
from typing import List, Tuple, Optional

## These functions are doing most of the work:

### Constrained Simplex Sampling

#### TODO: make this a little smarter; currently this is very bad at sampling from small constraint ranges

In [21]:
def sample_from_constrained_simplex(
    n_dimensions: int,
    constraints: Optional[List[Tuple[float, float]]] = None,
    max_attempts: int = 1000
):
    """
    Generate a random point from an N-dimensional simplex with optional element-wise constraints.
    
    Parameters:
        n_dimensions (int): Number of dimensions for the simplex
        constraints (List[Tuple[float, float]], optional): List of (min, max) constraints for each dimension.
            Use None for unconstrained dimensions. Example: [(0.2, 0.4), None, (0, 0.5)]
        max_attempts (int): Maximum number of attempts to find a valid solution
        
    Returns:
        numpy.ndarray: Array of N numbers between 0 and 1 that sum to 1 and satisfy constraints
        
    Raises:
        ValueError: If constraints are impossible to satisfy or if max_attempts is reached
    """

    if n_dimensions==0:
        sample = np.array([])
        return sample

    # Initialize constraints if not provided
    if constraints is None:
        constraints = [None] * n_dimensions
    elif len(constraints) != n_dimensions:
        raise ValueError("Length of constraints must match n_dimensions")
    
    # Validate constraints
    total_min = sum(c[0] for c in constraints if c is not None)
    if total_min > 1:
        raise ValueError("Sum of minimum constraints exceeds 1")
    
    for attempt in range(max_attempts):
        try:
            # Generate initial random sample
            sample = np.random.random(n_dimensions)
            sample = sample / np.sum(sample)  # Normalize to sum to 1
            
            # Apply constraints iteratively
            for _ in range(n_dimensions * 2):  # Allow multiple passes for adjustment
                modified = False
                
                # Adjust values to meet constraints
                for i, constraint in enumerate(constraints):
                    if constraint is not None:
                        min_val, max_val = constraint
                        if sample[i] < min_val:
                            deficit = min_val - sample[i]
                            # Take deficit proportionally from unconstrained elements
                            free_indices = [j for j, c in enumerate(constraints) 
                                         if c is None or (j != i and sample[j] > c[0])]
                            if not free_indices:
                                raise ValueError("Cannot satisfy minimum constraint")
                            weights = np.array([sample[j] for j in free_indices])
                            weights = weights / weights.sum()
                            for j, w in zip(free_indices, weights):
                                sample[j] -= deficit * w
                            sample[i] = min_val
                            modified = True
                        elif sample[i] > max_val:
                            excess = sample[i] - max_val
                            # Distribute excess proportionally to unconstrained elements
                            free_indices = [j for j, c in enumerate(constraints) 
                                         if c is None or (j != i and sample[j] < c[1])]
                            if not free_indices:
                                raise ValueError("Cannot satisfy maximum constraint")
                            sample[free_indices] += excess / len(free_indices)
                            sample[i] = max_val
                            modified = True
                
                # Normalize to sum to 1
                sample = sample / np.sum(sample)
                
                # Check if all constraints are satisfied
                constraints_satisfied = all(
                    c is None or (c[0] <= v <= c[1])
                    for c, v in zip(constraints, sample)
                )
                
                if constraints_satisfied and abs(sum(sample) - 1.0) < 1e-10:
                    return sample
                
                if not modified:
                    break
                    
        except ValueError:
            continue
            
    raise ValueError(f"Could not find valid solution after {max_attempts} attempts")

### TODO: allow user to add noise to the response functions (make use of the `noise` argument which currently does nothing)

In [61]:
### D-dimensional sigmoid function with the given set of D coefficients:
def sigmoid(input_row, coefs):
    value = 1 / (1 + np.exp(-1 * np.matmul(input_row, coefs)))
    return value


def build_sythetic_demo_dataset(inputs=5, outputs=1, num_rows=10, noise=0, coefs=None):

    ### TODO: allow user to add noise to the response functions (using the `noise` argument)
    
    if isinstance(inputs, int):
        num_inputs = inputs
    else:
        general_inputs = inputs["general"]
        formulation_inputs = inputs["formulation"]
        num_general_inputs = len(general_inputs)
        num_formulation_inputs = len(formulation_inputs)
        all_inputs = list(general_inputs) + list(formulation_inputs)
        num_inputs = len(all_inputs)
        if inputs["formulation"]:
            formulation_constraints = [(formulation_inputs[input_]["min"], formulation_inputs[input_]["max"]) for input_ in formulation_inputs]


    if isinstance(outputs, int):
        num_outputs = outputs
    else:
        num_outputs = len(outputs)  


    # Randomly set coefficients for the response function if not set by the user   
    if coefs==None:
        coefs = np.array([[np.random.uniform(-1, 1) for i in range(num_inputs)] for k in range(num_outputs)])

    
    # Generate input values
    if isinstance(inputs, int):
        num_inputs = inputs
        X = np.array([[np.random.uniform(-2, 2) for i in range(num_inputs)] for j in range(num_rows)])
    else:
        X_general = np.array([[np.random.uniform(-2, 2) for i in range(num_general_inputs)] for j in range(num_rows)])
        if inputs["formulation"]:
            X_formulation = np.array([sample_from_constrained_simplex(n_dimensions=num_formulation_inputs, constraints=formulation_constraints) for j in range(num_rows)])
            X = np.concatenate((X_general, X_formulation), axis=1)
        else:
            X = X_general


    # Generate output values
    y = list()
    for k in range(num_outputs):
        y.append(list())
        for row in X:
            y[k].append(sigmoid(row, coefs[k]))

    y = np.array(y)

    
    # Create pandas DataFrame for the generated data & name the columns
    data_df = pd.DataFrame()

    for i in range(num_inputs):
        if isinstance(inputs, int):
            data_df[f"x_{i+1}"] = X[:, i]
        else:
            data_df[all_inputs[i]] = X[:, i]
    
    for k in range(num_outputs):
        if isinstance(outputs, int):
            data_df[f"y_{k+1}"] = y[k]
        else:
            data_df[list(outputs)[k]] = y[k]

    coefs_df = pd.DataFrame(coefs)
    if isinstance(inputs, int):
        coefs_df = coefs_df.rename(columns={i: f"x_{i+1}" for i in range(len(coefs_df.T))})
        coefs_df = coefs_df.rename(index={k: f"y_{k+1}" for k in range(len(coefs_df))})
    else:
    ### TODO: fix this part!!!!!
        coefs_df = coefs_df.rename(columns={i: list(all_inputs)[i] for i in range(len(coefs_df.T))})
        coefs_df = coefs_df.rename(index={k: list(outputs)[k] for k in range(len(coefs_df))})








    ### TODO: clean this section up
    #################################
    if isinstance(inputs, int):
        pass
    else:
        df = data_df.copy()
        df_scaled = df.copy()

        for col in df.columns:
            if col in general_inputs:
                scaled_col = (df[col].to_numpy() + 2) / 4
            else:
                scaled_col = df[col]
            df_scaled[col] = scaled_col

        all_columns = dict()
        # all_columns.update(all_inputs)
        all_columns.update(general_inputs)
        all_columns.update(formulation_inputs)
        all_columns.update(outputs)

        for col in all_columns:
            if col in general_inputs or col in outputs:
                df_scaled[col] = df_scaled[col] * (all_columns[col]["max"] - all_columns[col]["min"]) + all_columns[col]["min"]

        column_renaming = {col: f'{col}_{all_columns[col]["units"]}' for col in all_columns}
        df_scaled = df_scaled.rename(column_renaming, axis=1)

        data_df = df_scaled

    #################################




    
    return data_df, coefs_df

### Convert ingredient recipe data tables from "Wide" to "Compact" format:

In [62]:
def wide_to_compact_format(df):
    """
    Convert formulation data from wide format to compact format.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame in wide format where:
        - Each row is a formulation
        - Each column is an ingredient with its weight percentage
    
    Returns:
    pandas.DataFrame: Transformed DataFrame in compact format with columns:
        - Ingredient A Name, Ingredient A weight %, Ingredient B Name, etc.
    """
    # Create an empty list to store the transformed rows
    compact_rows = []
    
    # Iterate through each formulation (row)
    for idx, row in df.iterrows():
        # Get non-zero ingredients and their percentages
        ingredients = row[row > 0]
        
        # Create a new row with alternating ingredient names and percentages
        new_row = {}
        for i, (ingredient_name, percentage) in enumerate(ingredients.items(), 1):
            new_row[f'Ingredient {chr(64+i)} Name'] = ingredient_name
            new_row[f'Ingredient {chr(64+i)} weight %'] = percentage
            
        compact_rows.append(new_row)
    
    # Convert to DataFrame
    result_df = pd.DataFrame(compact_rows)
    
    return result_df

### Convert ingredient recipe data tables from "Compact" to "Wide" format:

In [63]:
def compact_to_wide_format(df):
    """
    Convert formulation data from compact format to wide format.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame in compact format where:
        - Each row is a formulation
        - Columns alternate between ingredient names and weight percentages
    
    Returns:
    pandas.DataFrame: Transformed DataFrame in wide format where:
        - Each row is a formulation
        - Each column is an ingredient with its weight percentage
    """
    # Create a list to store the transformed rows
    wide_rows = []
    
    # Get all unique ingredients across all formulations
    ingredient_columns = [col for col in df.columns if 'Name' in col]
    all_ingredients = set()
    for col in ingredient_columns:
        all_ingredients.update(df[col].dropna().unique())
    
    # Process each formulation
    for idx, row in df.iterrows():
        # Create a dictionary with all ingredients initialized to 0
        formulation = {ingredient: 0 for ingredient in all_ingredients}
        
        # Fill in the actual values
        for i in range(1, len(df.columns) // 2 + 1):
            name_col = f'Ingredient {chr(64+i)} Name'
            weight_col = f'Ingredient {chr(64+i)} weight %'
            
            if name_col in df.columns and pd.notna(row[name_col]):
                ingredient_name = row[name_col]
                formulation[ingredient_name] = row[weight_col]
        
        wide_rows.append(formulation)
    
    # Convert to DataFrame
    result_df = pd.DataFrame(wide_rows)
    
    # Sort columns alphabetically for consistency
    result_df = result_df.reindex(sorted(result_df.columns), axis=1)
    
    return result_df

## Examples

### Example 1: generate arbitrary # of rows & columns, with no column names

In [64]:
data_df, coefs_df = build_sythetic_demo_dataset(inputs=9, outputs=4, num_rows=10)
data_df

Unnamed: 0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,y_1,y_2,y_3,y_4
0,0.080658,-1.436614,-0.30987,-1.113035,0.139508,1.312544,-1.924292,-1.412527,0.081377,0.166596,0.126806,0.394902,0.093979
1,-1.32761,1.374328,-1.567493,-1.91764,0.279771,-1.377591,-1.736837,1.879886,-0.929336,0.133558,0.026947,0.752549,0.128266
2,-1.828199,0.398853,1.972224,-0.973594,1.098866,0.601916,-1.690224,0.859979,1.896805,0.02845,0.01736,0.86569,0.379579
3,1.038634,0.387713,0.614892,-1.402795,-0.10628,1.126604,-0.948989,1.050949,-0.661703,0.079685,0.237611,0.56106,0.692047
4,0.374261,0.206134,1.131771,-0.051934,1.81415,-1.792805,-0.653711,1.948035,-0.010857,0.289316,0.910756,0.986115,0.618691
5,1.289776,-0.231742,-1.449407,-1.543683,0.709848,-1.914415,0.415541,-0.129963,-1.511942,0.947121,0.960925,0.666337,0.057083
6,0.293527,-1.835242,1.602415,-1.199811,0.085054,1.61388,-0.045531,0.13433,0.309233,0.070702,0.461945,0.172734,0.564167
7,0.357768,-1.856294,-1.122144,-1.728784,-1.204615,1.210953,-1.74939,-1.215161,-0.390276,0.218245,0.078807,0.165503,0.051932
8,1.72967,0.173169,1.192919,1.705519,0.871825,-0.414872,1.433614,-1.269006,-0.46836,0.939501,0.996649,0.732766,0.812533
9,0.046935,1.91637,0.428742,1.965484,1.387686,0.118365,0.431465,-0.241849,-0.921532,0.505825,0.882855,0.620965,0.946779


In [65]:
coefs_df

Unnamed: 0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9
y_1,0.737203,0.33539,-0.564722,-0.085373,-0.022659,-0.893724,0.795219,-0.843243,0.709609
y_2,0.906503,-0.506552,0.623694,0.504596,0.506247,-0.908472,0.676474,-0.361876,-0.775216
y_3,0.941447,0.288322,0.166714,0.354536,0.403707,-0.959923,-0.995067,0.301183,0.885269
y_4,0.149387,0.201792,0.251905,0.995234,0.064006,0.847235,0.321171,0.904586,-0.338434


### Example 2: create a laser welding dataset with named columns

#### Assign "reasonable" ranges and desired units for each input & output column:

In [66]:
inputs = {
    "general": {
        "Laser Power": {"min": 100, "max": 1000, "units": "W"},
        "Pulse Duration": {"min": 0.1, "max": 10, "units": "ms"},
        "Welding Speed": {"min": 1, "max": 200, "units": "mm/s"},
        "Beam Diameter": {"min": 0.1, "max": 3, "units": "mm"},
        "Focal Position": {"min": -2, "max": 5, "units": "mm"},
        # "Shielding Gas Type": {"min": , "max": , "units": "n/a"},  # leave out categorical inputs for now
        "Flow Rate": {"min": 5, "max": 25, "units": "L/min"},
        "Heat Input": {"min": 10, "max": 500, "units": "J/mm"},
        "Ambient Temperature": {"min": 20, "max": 30, "units": "degC"},
        "Cooling Rate": {"min": 10, "max": 1000, "units": "degC/s"},
    },
    "formulation": {
        # "Carbon": {"min": 0.0, "max": 0.0008, "units": "%"},
        # "Manganese": {"min": 0.00, "max": 0.02, "units": "%"},
        # "Molybdenum": {"min": 0.01, "max": 0.05, "units": "%"},
        "Nickel": {"min": 0.05, "max": 0.50, "units": "%"},
        "Chromium": {"min": 0.10, "max": 0.40, "units": "%"},
        "Iron": {"min": 0.0, "max": 1.0, "units": "%"},
    },
}

outputs = {
    "Hardness": {"min": 200, "max": 800, "units": "HV"},
    "Fatigue Life": {"min": 10000, "max": 100000, "units": "numCycles"},
    "Wear Rate": {"min": 0.01, "max": 1.0, "units": "mg/m"},
    "Cutting Efficiency": {"min": 0.1, "max": 5, "units": "m/s"},
}

In [67]:
data_df, coefs_df = build_sythetic_demo_dataset(inputs=inputs, outputs=outputs, num_rows=15)
data_df

Unnamed: 0,Laser Power_W,Pulse Duration_ms,Welding Speed_mm/s,Beam Diameter_mm,Focal Position_mm,Flow Rate_L/min,Heat Input_J/mm,Ambient Temperature_degC,Cooling Rate_degC/s,Nickel_%,Chromium_%,Iron_%,Hardness_HV,Fatigue Life_numCycles,Wear Rate_mg/m,Cutting Efficiency_m/s
0,397.853296,5.345822,67.915148,0.96346,2.607913,9.767948,130.376163,29.614096,862.00743,0.057759,0.278458,0.663783,446.772825,38702.828234,0.514912,0.495732
1,962.596437,1.664165,194.412766,0.821242,-1.343705,11.246175,453.096718,25.97507,178.898054,0.5,0.265698,0.234302,220.132949,17136.053977,0.515981,2.993547
2,639.300292,3.220724,18.82513,1.093737,-1.116908,11.328395,126.225684,24.534034,791.01724,0.495558,0.1,0.404442,371.694779,44392.203839,0.801957,2.061326
3,540.470986,2.038893,69.573122,1.530958,2.779136,9.050035,316.430262,29.613733,63.686335,0.5,0.385858,0.114142,211.772261,10270.976436,0.234343,1.447639
4,962.974248,8.331268,176.015491,1.209702,1.525301,6.989204,478.557911,22.458471,855.706744,0.5,0.275763,0.224237,598.005131,97446.487545,0.907819,2.480307
5,704.965016,9.047751,103.435205,2.560111,-0.744034,23.338167,398.310208,29.847057,319.168332,0.164541,0.261546,0.573912,209.86773,18061.485286,0.981775,0.334002
6,456.314441,2.921891,62.786436,1.2095,1.006453,15.546684,16.294525,27.878101,190.132715,0.401005,0.4,0.198995,297.707145,18420.1624,0.30122,0.930601
7,313.853539,0.432383,186.875968,1.001396,0.038478,9.428036,385.605541,21.030083,625.043578,0.234228,0.4,0.365772,733.726792,38688.747376,0.079624,4.950575
8,733.237235,1.761011,58.523863,0.698886,-0.373889,11.69744,429.208015,27.262755,94.090918,0.443697,0.4,0.156303,201.707659,10352.419807,0.495308,1.034592
9,420.270035,7.923392,104.809301,1.648126,1.857109,13.799153,404.419347,29.310427,62.038936,0.386548,0.4,0.213452,215.218662,11428.0834,0.568653,0.796093


In [68]:
coefs_df

Unnamed: 0,Laser Power,Pulse Duration,Welding Speed,Beam Diameter,Focal Position,Flow Rate,Heat Input,Ambient Temperature,Cooling Rate,Nickel,Chromium,Iron
Hardness,-0.533833,0.127312,0.936882,0.67352,0.16956,-0.489261,-0.927779,-0.87345,0.711534,-0.830775,-0.109051,-0.716241
Fatigue Life,0.717341,0.804969,0.781214,-0.255439,0.458722,0.487803,-0.89834,-0.97311,0.992863,-0.059708,0.133727,-0.645826
Wear Rate,0.929949,0.71226,-0.447318,0.589798,-0.502749,0.09329,0.075786,0.074167,0.300526,-0.266383,0.790177,0.732046
Cutting Efficiency,-0.805596,-0.599789,0.567962,0.964014,-0.516033,-0.881879,0.06875,-0.921366,-0.117264,-0.401426,-0.896368,-0.206275


## [Optional] Save result to Excel or CSV file: 

### Convert ingredient recipe data tables from "Wide" to "Compact" format:

In [32]:
wide_to_compact_format(data_df)

Unnamed: 0,Ingredient A Name,Ingredient A weight %,Ingredient B Name,Ingredient B weight %,Ingredient C Name,Ingredient C weight %,Ingredient D Name,Ingredient D weight %,Ingredient E Name,Ingredient E weight %,...,Ingredient L Name,Ingredient L weight %,Ingredient M Name,Ingredient M weight %,Ingredient N Name,Ingredient N weight %,Ingredient O Name,Ingredient O weight %,Ingredient P Name,Ingredient P weight %
0,Laser Power_W,747.82477,Pulse Duration_ms,6.088557,Welding Speed_mm/s,38.837249,Beam Diameter_mm,1.33973,Flow Rate_L/min,7.307837,...,Hardness_HV,666.953441,Fatigue Life_numCycles,11885.864563,Wear Rate_mg/m,0.878207,Cutting Efficiency_m/s,2.925855,,
1,Laser Power_W,240.555075,Pulse Duration_ms,7.428924,Welding Speed_mm/s,103.94444,Beam Diameter_mm,1.699466,Focal Position_mm,2.20629,...,Iron_%,0.1,Hardness_HV,415.041552,Fatigue Life_numCycles,41923.41118,Wear Rate_mg/m,0.351665,Cutting Efficiency_m/s,1.897693
2,Laser Power_W,635.038987,Pulse Duration_ms,4.167952,Welding Speed_mm/s,149.752268,Beam Diameter_mm,0.124408,Focal Position_mm,4.506985,...,Iron_%,0.180476,Hardness_HV,225.240296,Fatigue Life_numCycles,35053.445075,Wear Rate_mg/m,0.602052,Cutting Efficiency_m/s,4.471701
3,Laser Power_W,846.565943,Pulse Duration_ms,7.96197,Welding Speed_mm/s,88.924274,Beam Diameter_mm,1.689982,Flow Rate_L/min,7.945082,...,Hardness_HV,625.675171,Fatigue Life_numCycles,14666.160856,Wear Rate_mg/m,0.48065,Cutting Efficiency_m/s,1.964867,,
4,Laser Power_W,173.191569,Pulse Duration_ms,3.689389,Welding Speed_mm/s,57.226656,Beam Diameter_mm,2.116669,Focal Position_mm,3.111895,...,Iron_%,0.533038,Hardness_HV,771.54106,Fatigue Life_numCycles,87822.110628,Wear Rate_mg/m,0.386716,Cutting Efficiency_m/s,0.191496
5,Laser Power_W,462.865937,Pulse Duration_ms,1.886571,Welding Speed_mm/s,85.694646,Beam Diameter_mm,2.608962,Focal Position_mm,3.705617,...,Iron_%,0.523231,Hardness_HV,516.927032,Fatigue Life_numCycles,61604.546269,Wear Rate_mg/m,0.621485,Cutting Efficiency_m/s,4.371926
6,Laser Power_W,135.919693,Pulse Duration_ms,7.215864,Welding Speed_mm/s,120.819509,Beam Diameter_mm,2.774091,Focal Position_mm,0.032081,...,Iron_%,0.343574,Hardness_HV,678.761228,Fatigue Life_numCycles,88891.285622,Wear Rate_mg/m,0.155635,Cutting Efficiency_m/s,0.496115
7,Laser Power_W,192.594117,Pulse Duration_ms,4.413763,Welding Speed_mm/s,27.989293,Beam Diameter_mm,2.334797,Focal Position_mm,2.229336,...,Iron_%,0.291175,Hardness_HV,767.124038,Fatigue Life_numCycles,76828.686679,Wear Rate_mg/m,0.604772,Cutting Efficiency_m/s,1.203708
8,Laser Power_W,860.247911,Pulse Duration_ms,2.087838,Welding Speed_mm/s,68.337507,Beam Diameter_mm,1.042739,Focal Position_mm,0.686891,...,Iron_%,0.397009,Hardness_HV,587.942778,Fatigue Life_numCycles,42892.439844,Wear Rate_mg/m,0.931252,Cutting Efficiency_m/s,4.235688
9,Laser Power_W,211.98689,Pulse Duration_ms,8.609907,Welding Speed_mm/s,61.668599,Beam Diameter_mm,1.220692,Flow Rate_L/min,18.398425,...,Hardness_HV,623.05644,Fatigue Life_numCycles,87584.550217,Wear Rate_mg/m,0.211024,Cutting Efficiency_m/s,0.453819,,


In [12]:
# df_scaled.to_excel("Demo Datasets/Laser Welding (Synthetic)/laser_welding.xlsx", index=False)
# df_scaled.to_csv("Demo Datasets/Laser Welding (Synthetic)/laser_welding.csv", index=False)

# Done!

## Convert ingredient recipe data tables from "Wide" to "Compact" format:

In [13]:
# Example usage

# Create sample data in wide format
wide_data = {
    'Sugar': [10, 0, 15, 0],
    'Salt': [2, 1, 0, 0],
    'Flour': [83, 85, 73, 73],
    'Baking Powder': [0, 2, 5, 5],
    'Vanilla': [0, 7, 0, 0],
    'Brown Sugar': [0, 0, 0, 15],
    'Milk Chocolate Chips': [5, 0, 0, 0],
    'Dark Chocolate Chips': [0, 0, 5, 0],
    'White Chocolate Chips': [0, 0, 0, 5],
}

wide_df = pd.DataFrame(wide_data)

In [14]:
print("Original wide format:")
wide_df

Original wide format:


Unnamed: 0,Sugar,Salt,Flour,Baking Powder,Vanilla,Brown Sugar,Milk Chocolate Chips,Dark Chocolate Chips,White Chocolate Chips
0,10,2,83,0,0,0,5,0,0
1,0,1,85,2,7,0,0,0,0
2,15,0,73,5,0,0,0,5,0
3,0,0,73,5,0,15,0,0,5


In [15]:
print("\nTransformed compact format:")
wide_to_compact_format(wide_df)


Transformed compact format:


Unnamed: 0,Ingredient A Name,Ingredient A weight %,Ingredient B Name,Ingredient B weight %,Ingredient C Name,Ingredient C weight %,Ingredient D Name,Ingredient D weight %
0,Sugar,10,Salt,2,Flour,83,Milk Chocolate Chips,5
1,Salt,1,Flour,85,Baking Powder,2,Vanilla,7
2,Sugar,15,Flour,73,Baking Powder,5,Dark Chocolate Chips,5
3,Flour,73,Baking Powder,5,Brown Sugar,15,White Chocolate Chips,5


In [16]:
wide_to_compact_format(wide_df)

Unnamed: 0,Ingredient A Name,Ingredient A weight %,Ingredient B Name,Ingredient B weight %,Ingredient C Name,Ingredient C weight %,Ingredient D Name,Ingredient D weight %
0,Sugar,10,Salt,2,Flour,83,Milk Chocolate Chips,5
1,Salt,1,Flour,85,Baking Powder,2,Vanilla,7
2,Sugar,15,Flour,73,Baking Powder,5,Dark Chocolate Chips,5
3,Flour,73,Baking Powder,5,Brown Sugar,15,White Chocolate Chips,5


In [17]:
compact_to_wide_format(wide_to_compact_format(wide_df))

Unnamed: 0,Baking Powder,Brown Sugar,Dark Chocolate Chips,Flour,Milk Chocolate Chips,Salt,Sugar,Vanilla,White Chocolate Chips
0,0,0,0,83,5,2,10,0,0
1,2,0,0,85,0,1,0,7,0
2,5,0,5,73,0,0,15,0,0
3,5,15,0,73,0,0,0,0,5


In [45]:
wide_df[sorted(wide_df.columns)]

Unnamed: 0,Baking Powder,Brown Sugar,Dark Chocolate Chips,Flour,Milk Chocolate Chips,Salt,Sugar,Vanilla,White Chocolate Chips
0,0,0,0,83,5,2,10,0,0
1,2,0,0,85,0,1,0,7,0
2,5,0,5,73,0,0,15,0,0
3,5,15,0,73,0,0,0,0,5
