# Custom code for generating response functions & datasets:
- Currently, response functions are multi-dimensional sigmoids meaning all input-output relationships will be monotonic. Eventually, might be nice to support non-monotonic relationships as well, so that certain input features can have an "optimum" with worse performance on either side of the optimum.
- Also note: this currently only works for generating non-formulations datasets. Eventually, want to support formulations as well.

In [20]:
import numpy as np
import pandas as pd
from typing import List, Tuple, Optional

## These functions are doing most of the work:

### Constrained Simplex Sampling

#### TODO: make this a little smarter; currently this is very bad at sampling from small constraint ranges

In [21]:
def sample_from_constrained_simplex(
    n_dimensions: int,
    constraints: Optional[List[Tuple[float, float]]] = None,
    max_attempts: int = 1000
):
    """
    Generate a random point from an N-dimensional simplex with optional element-wise constraints.
    
    Parameters:
        n_dimensions (int): Number of dimensions for the simplex
        constraints (List[Tuple[float, float]], optional): List of (min, max) constraints for each dimension.
            Use None for unconstrained dimensions. Example: [(0.2, 0.4), None, (0, 0.5)]
        max_attempts (int): Maximum number of attempts to find a valid solution
        
    Returns:
        numpy.ndarray: Array of N numbers between 0 and 1 that sum to 1 and satisfy constraints
        
    Raises:
        ValueError: If constraints are impossible to satisfy or if max_attempts is reached
    """

    if n_dimensions==0:
        sample = np.array([])
        return sample

    # Initialize constraints if not provided
    if constraints is None:
        constraints = [None] * n_dimensions
    elif len(constraints) != n_dimensions:
        raise ValueError("Length of constraints must match n_dimensions")
    
    # Validate constraints
    total_min = sum(c[0] for c in constraints if c is not None)
    if total_min > 1:
        raise ValueError("Sum of minimum constraints exceeds 1")
    
    for attempt in range(max_attempts):
        try:
            # Generate initial random sample
            sample = np.random.random(n_dimensions)
            sample = sample / np.sum(sample)  # Normalize to sum to 1
            
            # Apply constraints iteratively
            for _ in range(n_dimensions * 2):  # Allow multiple passes for adjustment
                modified = False
                
                # Adjust values to meet constraints
                for i, constraint in enumerate(constraints):
                    if constraint is not None:
                        min_val, max_val = constraint
                        if sample[i] < min_val:
                            deficit = min_val - sample[i]
                            # Take deficit proportionally from unconstrained elements
                            free_indices = [j for j, c in enumerate(constraints) 
                                         if c is None or (j != i and sample[j] > c[0])]
                            if not free_indices:
                                raise ValueError("Cannot satisfy minimum constraint")
                            weights = np.array([sample[j] for j in free_indices])
                            weights = weights / weights.sum()
                            for j, w in zip(free_indices, weights):
                                sample[j] -= deficit * w
                            sample[i] = min_val
                            modified = True
                        elif sample[i] > max_val:
                            excess = sample[i] - max_val
                            # Distribute excess proportionally to unconstrained elements
                            free_indices = [j for j, c in enumerate(constraints) 
                                         if c is None or (j != i and sample[j] < c[1])]
                            if not free_indices:
                                raise ValueError("Cannot satisfy maximum constraint")
                            sample[free_indices] += excess / len(free_indices)
                            sample[i] = max_val
                            modified = True
                
                # Normalize to sum to 1
                sample = sample / np.sum(sample)
                
                # Check if all constraints are satisfied
                constraints_satisfied = all(
                    c is None or (c[0] <= v <= c[1])
                    for c, v in zip(constraints, sample)
                )
                
                if constraints_satisfied and abs(sum(sample) - 1.0) < 1e-10:
                    return sample
                
                if not modified:
                    break
                    
        except ValueError:
            continue
            
    raise ValueError(f"Could not find valid solution after {max_attempts} attempts")

### TODO: allow user to add noise to the response functions (make use of the `noise` argument which currently does nothing)

In [22]:
### D-dimensional sigmoid function with the given set of D coefficients:
def sigmoid(input_row, coefs):
    value = 1 / (1 + np.exp(-1 * np.matmul(input_row, coefs)))
    return value


def build_sythetic_demo_dataset(inputs=5, outputs=1, num_rows=10, noise=0, coefs=None):

    ### TODO: allow user to add noise to the response functions (using the `noise` argument)
    
    if isinstance(inputs, int):
        num_inputs = inputs
    else:
        general_inputs = inputs["general"]
        formulation_inputs = inputs["formulation"]
        num_general_inputs = len(general_inputs)
        num_formulation_inputs = len(formulation_inputs)
        all_inputs = list(general_inputs) + list(formulation_inputs)
        num_inputs = len(all_inputs)
        if inputs["formulation"]:
            formulation_constraints = [(formulation_inputs[input_]["min"], formulation_inputs[input_]["max"]) for input_ in formulation_inputs]


    if isinstance(outputs, int):
        num_outputs = outputs
    else:
        num_outputs = len(outputs)  


    # Randomly set coefficients for the response function if not set by the user   
    if coefs==None:
        coefs = np.array([[np.random.uniform(-1, 1) for i in range(num_inputs)] for k in range(num_outputs)])

    
    # Generate input values
    if isinstance(inputs, int):
        num_inputs = inputs
        X = np.array([[np.random.uniform(-2, 2) for i in range(num_inputs)] for j in range(num_rows)])
    else:
        X_general = np.array([[np.random.uniform(-2, 2) for i in range(num_general_inputs)] for j in range(num_rows)])
        if inputs["formulation"]:
            X_formulation = np.array([sample_from_constrained_simplex(n_dimensions=num_formulation_inputs, constraints=formulation_constraints) for j in range(num_rows)])
            X = np.concatenate((X_general, X_formulation), axis=1)
        else:
            X = X_general


    # Generate output values
    y = list()
    for k in range(num_outputs):
        y.append(list())
        for row in X:
            y[k].append(sigmoid(row, coefs[k]))

    y = np.array(y)

    
    # Create pandas DataFrame for the generated data & name the columns
    data_df = pd.DataFrame()

    for i in range(num_inputs):
        if isinstance(inputs, int):
            data_df[f"x_{i+1}"] = X[:, i]
        else:
            data_df[all_inputs[i]] = X[:, i]
    
    for k in range(num_outputs):
        if isinstance(outputs, int):
            data_df[f"y_{k+1}"] = y[k]
        else:
            data_df[list(outputs)[k]] = y[k]









    ### TODO: clean this section up
    #################################
    if isinstance(inputs, int):
        pass
    else:
        df = data_df.copy()
        df_scaled = df.copy()

        for col in df.columns:
            if col in general_inputs:
                scaled_col = (df[col].to_numpy() + 2) / 4
            else:
                scaled_col = df[col]
            df_scaled[col] = scaled_col

        all_columns = dict()
        # all_columns.update(all_inputs)
        all_columns.update(general_inputs)
        all_columns.update(formulation_inputs)
        all_columns.update(outputs)

        for col in all_columns:
            if col in general_inputs or col in outputs:
                df_scaled[col] = df_scaled[col] * (all_columns[col]["max"] - all_columns[col]["min"]) + all_columns[col]["min"]

        column_renaming = {col: f'{col}_{all_columns[col]["units"]}' for col in all_columns}
        df_scaled = df_scaled.rename(column_renaming, axis=1)

        data_df = df_scaled

    #################################


    coefs_df = pd.DataFrame(coefs)
    coefs_df = coefs_df.rename(index={k: f"y_{k+1}" for k in range(len(coefs_df))})


    ### TODO: fix this part!!!!!

    # coefs_df = coefs_df.rename(columns={i: list(inputs)[i] for i in range(len(coefs_df.T))})
    # coefs_df = coefs_df.rename(index={k: list(outputs)[k] for k in range(len(coefs_df))})


    
    return data_df, coefs_df

### Convert ingredient recipe data tables from "Wide" to "Compact" format:

In [23]:
def wide_to_compact_format(df):
    """
    Convert formulation data from wide format to compact format.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame in wide format where:
        - Each row is a formulation
        - Each column is an ingredient with its weight percentage
    
    Returns:
    pandas.DataFrame: Transformed DataFrame in compact format with columns:
        - Ingredient A Name, Ingredient A weight %, Ingredient B Name, etc.
    """
    # Create an empty list to store the transformed rows
    compact_rows = []
    
    # Iterate through each formulation (row)
    for idx, row in df.iterrows():
        # Get non-zero ingredients and their percentages
        ingredients = row[row > 0]
        
        # Create a new row with alternating ingredient names and percentages
        new_row = {}
        for i, (ingredient_name, percentage) in enumerate(ingredients.items(), 1):
            new_row[f'Ingredient {chr(64+i)} Name'] = ingredient_name
            new_row[f'Ingredient {chr(64+i)} weight %'] = percentage
            
        compact_rows.append(new_row)
    
    # Convert to DataFrame
    result_df = pd.DataFrame(compact_rows)
    
    return result_df

### Convert ingredient recipe data tables from "Compact" to "Wide" format:

In [24]:
def compact_to_wide_format(df):
    """
    Convert formulation data from compact format to wide format.
    
    Parameters:
    df (pandas.DataFrame): Input DataFrame in compact format where:
        - Each row is a formulation
        - Columns alternate between ingredient names and weight percentages
    
    Returns:
    pandas.DataFrame: Transformed DataFrame in wide format where:
        - Each row is a formulation
        - Each column is an ingredient with its weight percentage
    """
    # Create a list to store the transformed rows
    wide_rows = []
    
    # Get all unique ingredients across all formulations
    ingredient_columns = [col for col in df.columns if 'Name' in col]
    all_ingredients = set()
    for col in ingredient_columns:
        all_ingredients.update(df[col].dropna().unique())
    
    # Process each formulation
    for idx, row in df.iterrows():
        # Create a dictionary with all ingredients initialized to 0
        formulation = {ingredient: 0 for ingredient in all_ingredients}
        
        # Fill in the actual values
        for i in range(1, len(df.columns) // 2 + 1):
            name_col = f'Ingredient {chr(64+i)} Name'
            weight_col = f'Ingredient {chr(64+i)} weight %'
            
            if name_col in df.columns and pd.notna(row[name_col]):
                ingredient_name = row[name_col]
                formulation[ingredient_name] = row[weight_col]
        
        wide_rows.append(formulation)
    
    # Convert to DataFrame
    result_df = pd.DataFrame(wide_rows)
    
    # Sort columns alphabetically for consistency
    result_df = result_df.reindex(sorted(result_df.columns), axis=1)
    
    return result_df

## Examples

### Example 1: generate arbitrary # of rows & columns, with no column names

In [25]:
data_df, coefs = build_sythetic_demo_dataset(inputs=9, outputs=4, num_rows=10)
data_df

Unnamed: 0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,y_1,y_2,y_3,y_4
0,0.775274,1.858443,1.435851,-0.962765,1.79794,-0.558227,0.962568,-1.314304,-0.072742,0.00131,0.676048,0.811025,0.057742
1,0.921463,0.247986,1.865291,0.951022,-1.563817,0.807798,1.743722,-0.690826,-0.539389,0.436157,0.416758,0.545094,0.424638
2,0.263483,0.735561,1.80204,0.139366,1.878232,-0.870966,0.50295,-0.369764,1.133009,0.004453,0.873669,0.409588,0.123065
3,-0.19986,-0.720936,0.542023,-1.296319,1.77024,-1.816948,0.010615,-1.230467,-0.373321,0.022662,0.888628,0.66698,0.219673
4,-1.700046,0.733878,0.256438,-1.559247,-1.882089,-1.47978,-1.984878,0.92363,-0.572252,0.604467,0.501922,0.730672,0.113981
5,-0.625469,1.74896,-0.105684,1.181713,-1.250616,-1.354986,-0.962794,-1.999117,0.08723,0.236054,0.528022,0.612829,0.108643
6,-0.790975,-0.002137,0.515927,-0.199728,0.146025,-0.30438,0.312415,1.18158,-0.17879,0.593957,0.458576,0.451929,0.410594
7,-1.880453,-1.688978,-1.182586,1.368425,1.918007,-1.691301,-0.922775,1.878239,-1.801371,0.996294,0.292592,0.320488,0.584929
8,-1.123582,1.259157,-1.09947,1.365413,1.209428,1.409219,0.729758,0.055634,0.148694,0.870093,0.093961,0.464066,0.776161
9,0.177445,1.657991,-0.426003,-1.115564,-0.442054,1.028315,1.289228,-1.741144,0.08054,0.068555,0.283277,0.802689,0.515209


In [26]:
coefs_df = pd.DataFrame(coefs)
coefs_df = coefs_df.rename(index={k: f"y_{k+1}" for k in range(len(coefs_df))})

### CHECK:
coefs_df

Unnamed: 0,0,1,2,3,4,5,6,7,8
y_1,-0.631516,-0.833431,-0.850837,0.94192,-0.606753,0.738597,-0.24243,0.611132,-0.968736
y_2,0.188287,-0.479475,0.40973,-0.216511,0.078706,-0.590069,-0.082709,-0.262365,0.716257
y_3,-0.00692,0.347501,0.109907,-0.422782,0.058364,0.197543,-0.178506,-0.291186,-0.63022
y_4,-0.317657,-0.812548,-0.506678,0.071482,-0.062485,0.80372,0.226162,-0.093061,0.255925


### Example 2: create a laser welding dataset with named columns

#### Assign "reasonable" ranges and desired units for each input & output column:

In [27]:
inputs = {
    "general": {
        "Laser Power": {"min": 100, "max": 1000, "units": "W"},
        "Pulse Duration": {"min": 0.1, "max": 10, "units": "ms"},
        "Welding Speed": {"min": 1, "max": 200, "units": "mm/s"},
        "Beam Diameter": {"min": 0.1, "max": 3, "units": "mm"},
        "Focal Position": {"min": -2, "max": 5, "units": "mm"},
        # "Shielding Gas Type": {"min": , "max": , "units": "n/a"},  # leave out categorical inputs for now
        "Flow Rate": {"min": 5, "max": 25, "units": "L/min"},
        "Heat Input": {"min": 10, "max": 500, "units": "J/mm"},
        "Ambient Temperature": {"min": 20, "max": 30, "units": "degC"},
        "Cooling Rate": {"min": 10, "max": 1000, "units": "degC/s"},
    },
    "formulation": {
        # "Carbon": {"min": 0.0, "max": 0.0008, "units": "%"},
        # "Manganese": {"min": 0.00, "max": 0.02, "units": "%"},
        # "Molybdenum": {"min": 0.01, "max": 0.05, "units": "%"},
        "Nickel": {"min": 0.05, "max": 0.50, "units": "%"},
        "Chromium": {"min": 0.10, "max": 0.40, "units": "%"},
        "Iron": {"min": 0.0, "max": 1.0, "units": "%"},
    },
}

outputs = {
    "Hardness": {"min": 200, "max": 800, "units": "HV"},
    "Fatigue Life": {"min": 10000, "max": 100000, "units": "numCycles"},
    "Wear Rate": {"min": 0.01, "max": 1.0, "units": "mg/m"},
    "Cutting Efficiency": {"min": 0.1, "max": 5, "units": "m/s"},
}

In [28]:
data_df, coefs_df = build_sythetic_demo_dataset(inputs=inputs, outputs=outputs, num_rows=15)
data_df

Unnamed: 0,Laser Power_W,Pulse Duration_ms,Welding Speed_mm/s,Beam Diameter_mm,Focal Position_mm,Flow Rate_L/min,Heat Input_J/mm,Ambient Temperature_degC,Cooling Rate_degC/s,Nickel_%,Chromium_%,Iron_%,Hardness_HV,Fatigue Life_numCycles,Wear Rate_mg/m,Cutting Efficiency_m/s
0,747.82477,6.088557,38.837249,1.33973,-1.149684,7.307837,434.558509,20.859892,950.665818,0.470012,0.4,0.129988,666.953441,11885.864563,0.878207,2.925855
1,240.555075,7.428924,103.94444,1.699466,2.20629,18.035884,307.043011,25.132349,493.418372,0.5,0.4,0.1,415.041552,41923.41118,0.351665,1.897693
2,635.038987,4.167952,149.752268,0.124408,4.506985,11.114167,265.152959,24.726085,176.192922,0.419524,0.4,0.180476,225.240296,35053.445075,0.602052,4.471701
3,846.565943,7.96197,88.924274,1.689982,-1.155575,7.945082,316.117853,25.195481,945.991275,0.128903,0.4,0.471097,625.675171,14666.160856,0.48065,1.964867
4,173.191569,3.689389,57.226656,2.116669,3.111895,24.085098,462.437432,29.667675,461.018303,0.066962,0.4,0.533038,771.54106,87822.110628,0.386716,0.191496
5,462.865937,1.886571,85.694646,2.608962,3.705617,16.091343,206.334465,20.984739,289.158088,0.223735,0.253033,0.523231,516.927032,61604.546269,0.621485,4.371926
6,135.919693,7.215864,120.819509,2.774091,0.032081,22.542513,224.786888,27.403992,411.200558,0.256426,0.4,0.343574,678.761228,88891.285622,0.155635,0.496115
7,192.594117,4.413763,27.989293,2.334797,2.229336,17.739793,196.942577,27.401617,965.942561,0.423173,0.285652,0.291175,767.124038,76828.686679,0.604772,1.203708
8,860.247911,2.087838,68.337507,1.042739,0.686891,17.356311,338.226795,21.304647,158.426431,0.275422,0.327569,0.397009,587.942778,42892.439844,0.931252,4.235688
9,211.98689,8.609907,61.668599,1.220692,-0.075194,18.398425,219.420479,28.185429,179.594343,0.454006,0.255148,0.290845,623.05644,87584.550217,0.211024,0.453819


In [29]:
coefs_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
y_1,-0.167522,-0.735655,-0.995782,0.437325,-0.721978,0.093445,0.157578,0.538761,0.151709,-0.674191,0.865445,0.103806
y_2,-0.559759,-0.911196,-0.085058,0.229927,-0.467714,0.123653,-0.510802,0.839254,-0.610951,0.212934,-0.914607,-0.168996
y_3,0.089145,-0.755587,-0.189265,-0.869663,-0.25794,0.639316,-0.082806,-0.679161,0.59997,0.178834,0.192352,0.494502
y_4,0.853919,0.007177,0.210441,-0.167614,0.661144,0.255834,-0.861209,-0.924915,0.166691,0.942584,0.604754,-0.708722


In [30]:
# coefs_df = pd.DataFrame(coefs)

coefs_df = coefs_df.rename(columns={i: list(all_inputs)[i] for i in range(len(coefs_df.T))})
coefs_df = coefs_df.rename(index={k: list(outputs)[k] for k in range(len(coefs_df))})

### CHECK:
coefs_df

NameError: name 'all_inputs' is not defined

## [Optional] Save result to Excel or CSV file: 

### Convert ingredient recipe data tables from "Wide" to "Compact" format:

In [32]:
wide_to_compact_format(data_df)

Unnamed: 0,Ingredient A Name,Ingredient A weight %,Ingredient B Name,Ingredient B weight %,Ingredient C Name,Ingredient C weight %,Ingredient D Name,Ingredient D weight %,Ingredient E Name,Ingredient E weight %,...,Ingredient L Name,Ingredient L weight %,Ingredient M Name,Ingredient M weight %,Ingredient N Name,Ingredient N weight %,Ingredient O Name,Ingredient O weight %,Ingredient P Name,Ingredient P weight %
0,Laser Power_W,747.82477,Pulse Duration_ms,6.088557,Welding Speed_mm/s,38.837249,Beam Diameter_mm,1.33973,Flow Rate_L/min,7.307837,...,Hardness_HV,666.953441,Fatigue Life_numCycles,11885.864563,Wear Rate_mg/m,0.878207,Cutting Efficiency_m/s,2.925855,,
1,Laser Power_W,240.555075,Pulse Duration_ms,7.428924,Welding Speed_mm/s,103.94444,Beam Diameter_mm,1.699466,Focal Position_mm,2.20629,...,Iron_%,0.1,Hardness_HV,415.041552,Fatigue Life_numCycles,41923.41118,Wear Rate_mg/m,0.351665,Cutting Efficiency_m/s,1.897693
2,Laser Power_W,635.038987,Pulse Duration_ms,4.167952,Welding Speed_mm/s,149.752268,Beam Diameter_mm,0.124408,Focal Position_mm,4.506985,...,Iron_%,0.180476,Hardness_HV,225.240296,Fatigue Life_numCycles,35053.445075,Wear Rate_mg/m,0.602052,Cutting Efficiency_m/s,4.471701
3,Laser Power_W,846.565943,Pulse Duration_ms,7.96197,Welding Speed_mm/s,88.924274,Beam Diameter_mm,1.689982,Flow Rate_L/min,7.945082,...,Hardness_HV,625.675171,Fatigue Life_numCycles,14666.160856,Wear Rate_mg/m,0.48065,Cutting Efficiency_m/s,1.964867,,
4,Laser Power_W,173.191569,Pulse Duration_ms,3.689389,Welding Speed_mm/s,57.226656,Beam Diameter_mm,2.116669,Focal Position_mm,3.111895,...,Iron_%,0.533038,Hardness_HV,771.54106,Fatigue Life_numCycles,87822.110628,Wear Rate_mg/m,0.386716,Cutting Efficiency_m/s,0.191496
5,Laser Power_W,462.865937,Pulse Duration_ms,1.886571,Welding Speed_mm/s,85.694646,Beam Diameter_mm,2.608962,Focal Position_mm,3.705617,...,Iron_%,0.523231,Hardness_HV,516.927032,Fatigue Life_numCycles,61604.546269,Wear Rate_mg/m,0.621485,Cutting Efficiency_m/s,4.371926
6,Laser Power_W,135.919693,Pulse Duration_ms,7.215864,Welding Speed_mm/s,120.819509,Beam Diameter_mm,2.774091,Focal Position_mm,0.032081,...,Iron_%,0.343574,Hardness_HV,678.761228,Fatigue Life_numCycles,88891.285622,Wear Rate_mg/m,0.155635,Cutting Efficiency_m/s,0.496115
7,Laser Power_W,192.594117,Pulse Duration_ms,4.413763,Welding Speed_mm/s,27.989293,Beam Diameter_mm,2.334797,Focal Position_mm,2.229336,...,Iron_%,0.291175,Hardness_HV,767.124038,Fatigue Life_numCycles,76828.686679,Wear Rate_mg/m,0.604772,Cutting Efficiency_m/s,1.203708
8,Laser Power_W,860.247911,Pulse Duration_ms,2.087838,Welding Speed_mm/s,68.337507,Beam Diameter_mm,1.042739,Focal Position_mm,0.686891,...,Iron_%,0.397009,Hardness_HV,587.942778,Fatigue Life_numCycles,42892.439844,Wear Rate_mg/m,0.931252,Cutting Efficiency_m/s,4.235688
9,Laser Power_W,211.98689,Pulse Duration_ms,8.609907,Welding Speed_mm/s,61.668599,Beam Diameter_mm,1.220692,Flow Rate_L/min,18.398425,...,Hardness_HV,623.05644,Fatigue Life_numCycles,87584.550217,Wear Rate_mg/m,0.211024,Cutting Efficiency_m/s,0.453819,,


In [12]:
# df_scaled.to_excel("Demo Datasets/Laser Welding (Synthetic)/laser_welding.xlsx", index=False)
# df_scaled.to_csv("Demo Datasets/Laser Welding (Synthetic)/laser_welding.csv", index=False)

# Done!

## Convert ingredient recipe data tables from "Wide" to "Compact" format:

In [13]:
# Example usage

# Create sample data in wide format
wide_data = {
    'Sugar': [10, 0, 15, 0],
    'Salt': [2, 1, 0, 0],
    'Flour': [83, 85, 73, 73],
    'Baking Powder': [0, 2, 5, 5],
    'Vanilla': [0, 7, 0, 0],
    'Brown Sugar': [0, 0, 0, 15],
    'Milk Chocolate Chips': [5, 0, 0, 0],
    'Dark Chocolate Chips': [0, 0, 5, 0],
    'White Chocolate Chips': [0, 0, 0, 5],
}

wide_df = pd.DataFrame(wide_data)

In [14]:
print("Original wide format:")
wide_df

Original wide format:


Unnamed: 0,Sugar,Salt,Flour,Baking Powder,Vanilla,Brown Sugar,Milk Chocolate Chips,Dark Chocolate Chips,White Chocolate Chips
0,10,2,83,0,0,0,5,0,0
1,0,1,85,2,7,0,0,0,0
2,15,0,73,5,0,0,0,5,0
3,0,0,73,5,0,15,0,0,5


In [15]:
print("\nTransformed compact format:")
wide_to_compact_format(wide_df)


Transformed compact format:


Unnamed: 0,Ingredient A Name,Ingredient A weight %,Ingredient B Name,Ingredient B weight %,Ingredient C Name,Ingredient C weight %,Ingredient D Name,Ingredient D weight %
0,Sugar,10,Salt,2,Flour,83,Milk Chocolate Chips,5
1,Salt,1,Flour,85,Baking Powder,2,Vanilla,7
2,Sugar,15,Flour,73,Baking Powder,5,Dark Chocolate Chips,5
3,Flour,73,Baking Powder,5,Brown Sugar,15,White Chocolate Chips,5


In [16]:
wide_to_compact_format(wide_df)

Unnamed: 0,Ingredient A Name,Ingredient A weight %,Ingredient B Name,Ingredient B weight %,Ingredient C Name,Ingredient C weight %,Ingredient D Name,Ingredient D weight %
0,Sugar,10,Salt,2,Flour,83,Milk Chocolate Chips,5
1,Salt,1,Flour,85,Baking Powder,2,Vanilla,7
2,Sugar,15,Flour,73,Baking Powder,5,Dark Chocolate Chips,5
3,Flour,73,Baking Powder,5,Brown Sugar,15,White Chocolate Chips,5


In [17]:
compact_to_wide_format(wide_to_compact_format(wide_df))

Unnamed: 0,Baking Powder,Brown Sugar,Dark Chocolate Chips,Flour,Milk Chocolate Chips,Salt,Sugar,Vanilla,White Chocolate Chips
0,0,0,0,83,5,2,10,0,0
1,2,0,0,85,0,1,0,7,0
2,5,0,5,73,0,0,15,0,0
3,5,15,0,73,0,0,0,0,5


In [45]:
wide_df[sorted(wide_df.columns)]

Unnamed: 0,Baking Powder,Brown Sugar,Dark Chocolate Chips,Flour,Milk Chocolate Chips,Salt,Sugar,Vanilla,White Chocolate Chips
0,0,0,0,83,5,2,10,0,0
1,2,0,0,85,0,1,0,7,0
2,5,0,5,73,0,0,15,0,0
3,5,15,0,73,0,0,0,0,5
