# Custom code for generating response functions & datasets:

1. Support N rows, D input dimensions, Y output dimensions
2. Support setting bounds for each input & output
3. Support setting different levels of noise on each output
4. Support setting magnitude & direction of influence of each input on a given output
5. Be able to feed through any "forward-predict" set of inputs to the "ground truth" function after it's been created

In [1]:
import numpy as np
import pandas as pd

In [7]:
### D-dimensional sigmoid function with the given set of D coefficients:
def sigmoid(input_row, coefs):
    value = 1 / (1 + np.exp(-1 * np.matmul(input_row, coefs)))
    return value


def build_synthetic_dataset(inputs=5, outputs=1, num_rows=10, noise=0, coefs=None):
    
    if isinstance(inputs, int):
        num_inputs = inputs
    else:
        num_inputs = len(inputs)

    if isinstance(outputs, int):
        num_outputs = outputs
    else:
        num_outputs = len(outputs)


    ### TODO: allow user to set their own coefficients!!!
    if coefs == None:
        coefs = np.array([[np.random.uniform(-1, 1) for i in range(num_inputs)] for k in range(num_outputs)])


    X = np.array([[np.random.uniform(-2, 2) for i in range(num_inputs)] for j in range(num_rows)])

    y = list()
    for k in range(num_outputs):
        y.append(list())
        for row in X:
            y[k].append(sigmoid(row, coefs[k]))

    y = np.array(y)


    data_df = pd.DataFrame()

    for i in range(num_inputs):
        if isinstance(inputs, int):
            data_df[f"x_{i+1}"] = X[:, i]
        else:
            data_df[list(inputs)[i]] = X[:, i]

    for k in range(num_outputs):
        if isinstance(outputs, int):
            data_df[f"y_{k+1}"] = y[k]
        else:
            data_df[list(outputs)[k]] = y[k]


    return data_df, coefs

In [8]:
inputs = {
    "Laser Power": {"min": 100, "max": 1000, "units": "W"},
    "Pulse Duration": {"min": 0.1, "max": 10, "units": "ms"},
    "Welding Speed": {"min": 1, "max": 200, "units": "mm/s"},
    "Beam Diameter": {"min": 0.1, "max": 3, "units": "mm"},
    "Focal Position": {"min": -2, "max": 5, "units": "mm"},
    # "Shielding Gas Type": {"min": , "max": , "units": "n/a"},
    "Flow Rate": {"min": 5, "max": 25, "units": "L/min"},
    "Heat Input": {"min": 10, "max": 500, "units": "J/mm"},
    "Ambient Temperature": {"min": 20, "max": 30, "units": "degC"},
    "Cooling Rate": {"min": 10, "max": 1000, "units": "degC/s"},
}

outputs = {
    "Hardness": {"min": 200, "max": 800, "units": "HV"},
    "Fatigue Life": {"min": 10000, "max": 100000, "units": "numCycles"},
    "Wear Rate": {"min": 0.01, "max": 1.0, "units": "mg/m"},
    "Cutting Efficiency": {"min": 0.1, "max": 5, "units": "m/s"},
}

In [9]:
data_df, coefs = build_synthetic_dataset(inputs=inputs, outputs=outputs, num_rows=80)

In [14]:
coefs_df = pd.DataFrame(coefs)
coefs_df = coefs_df.rename(index={k: f"y_{k+1}" for k in range(len(coefs_df))})

coefs_df

Unnamed: 0,0,1,2,3,4,5,6,7,8
y_1,-0.634988,-0.610403,0.15529,-0.241337,0.592022,-0.845849,0.627963,-0.587279,0.805159
y_2,-0.44967,0.501203,-0.397014,0.390859,0.957908,0.557419,-0.125828,0.923404,-0.256149
y_3,-0.053166,-0.055074,0.522483,-0.888262,-0.523606,0.238945,-0.422151,0.406683,0.774202
y_4,0.496607,-0.258563,-0.431953,-0.130258,0.940278,-0.443167,-0.766519,0.416737,0.071739


In [10]:
df = data_df.copy()

# Implement Scaling Strategy:

In [11]:
df_scaled = df.copy()

for col in df.columns:
    if col in inputs:
        scaled_col = (df[col].to_numpy() + 2) / 4
        df_scaled[col] = scaled_col

all_columns = dict()
all_columns.update(inputs)
all_columns.update(outputs)

for col in all_columns:
    df_scaled[col] = df_scaled[col] * (all_columns[col]["max"] - all_columns[col]["min"]) + all_columns[col]["min"]

column_renaming = {col: f'{col}_{all_columns[col]["units"]}' for col in all_columns}
df_scaled = df_scaled.rename(column_renaming, axis=1)

df_scaled

Unnamed: 0,Laser Power_W,Pulse Duration_ms,Welding Speed_mm/s,Beam Diameter_mm,Focal Position_mm,Flow Rate_L/min,Heat Input_J/mm,Ambient Temperature_degC,Cooling Rate_degC/s,Hardness_HV,Fatigue Life_numCycles,Wear Rate_mg/m,Cutting Efficiency_m/s
0,113.713642,2.679319,120.935883,2.096005,-1.482082,14.718795,126.113219,29.970587,294.999900,286.968224,76585.547661,0.766614,1.423620
1,772.844484,9.004940,10.439518,1.857986,4.501493,23.237073,397.839005,21.596130,797.461569,517.568430,90136.271416,0.118067,2.928525
2,918.927783,5.466613,7.111312,2.757836,4.897038,24.606440,261.280991,22.318435,644.429357,339.132412,93376.078194,0.051226,4.358287
3,283.048002,2.941247,147.348596,0.838138,-0.349865,15.791748,417.829625,27.401784,902.487845,752.795608,29779.898934,0.958867,0.582143
4,366.232143,6.082522,185.396807,1.533911,0.979201,18.764714,498.641809,20.670720,555.742623,737.530363,21945.819010,0.475914,0.180458
...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,561.709730,8.346749,28.087096,1.474552,3.563197,24.539299,73.252267,26.125094,407.775649,218.017766,98499.355157,0.407204,4.353199
76,717.699228,3.963958,158.491867,1.502677,3.690833,9.858366,223.177980,24.912593,819.901226,753.813389,39013.626029,0.699564,4.360952
77,789.270262,2.338064,15.942385,0.824970,0.532259,23.306000,342.618033,24.713617,67.733543,238.947482,52966.757203,0.271263,2.181592
78,391.156012,3.725438,99.687214,2.472764,4.029180,6.987129,335.845966,23.084795,148.168380,749.169491,68584.048989,0.029296,3.448862


In [None]:
# df_scaled.to_excel(".xlsx")
# df_scaled.to_csv(".csv")