In [7]:
import pandas as pd

In [8]:
df = pd.read_csv("data.csv") # data from the task

In [9]:
df

Unnamed: 0,Al,Ca,Fe,Mg,Mn,Ni,overpotential
0,0.123204,0.106073,0.528617,0.203194,0.003486,0.035425,1.7122
1,0.036817,0.044191,0.743975,0.107923,0.001843,0.065252,1.7164
2,0.067023,0.150251,0.470664,0.291139,0.007421,0.013501,1.7228
3,0.059822,0.088446,0.430872,0.396597,0.005039,0.019225,1.7250
4,0.060222,0.101457,0.405555,0.414243,0.005944,0.012578,1.7264
...,...,...,...,...,...,...,...
238,0.350899,0.204542,0.258098,0.177185,0.003356,0.005919,2.2541
239,0.212779,0.187240,0.330747,0.257557,0.005539,0.006137,2.2543
240,0.165565,0.185401,0.360911,0.275456,0.006418,0.006249,2.2612
241,0.065797,0.181515,0.424648,0.313278,0.008277,0.006484,2.2632


In [10]:
import torch

In [11]:

Metal = ['Al', 'Ca', 'Fe', 'Mg', 'Mn', 'Ni']
M_list = torch.tensor([
    [0.00149, 0.02, 0.0803, 2.759, 0.0779],
    [0.0055, 0.1374, 0.7697, 1.417, 1.142],
    [3.303, 1.379, 2.4, 0.97, 2.238],
    [0.00155, 5.167, 1.306, 0.744, 1.438],
    [0, 0.0288, 0.0521, 0.0132, 0.0541],
    [0.328, 0, 0, 0, 0]
], dtype=torch.float32).T

abcdelist = torch.tensor(
    [0.27105, 0.567, 0.5632, 0.935, 0.6885], dtype=torch.float32)

In [12]:
M_list

tensor([[1.4900e-03, 5.5000e-03, 3.3030e+00, 1.5500e-03, 0.0000e+00, 3.2800e-01],
        [2.0000e-02, 1.3740e-01, 1.3790e+00, 5.1670e+00, 2.8800e-02, 0.0000e+00],
        [8.0300e-02, 7.6970e-01, 2.4000e+00, 1.3060e+00, 5.2100e-02, 0.0000e+00],
        [2.7590e+00, 1.4170e+00, 9.7000e-01, 7.4400e-01, 1.3200e-02, 0.0000e+00],
        [7.7900e-02, 1.1420e+00, 2.2380e+00, 1.4380e+00, 5.4100e-02, 0.0000e+00]])

In [13]:
def reverse_transform(metal_ratios: torch.Tensor) -> torch.Tensor:
    """
    Given metal_ratios (for Fe, Mn, Ni, Ca, Mg, Al) as a tensor of shape [batch, 6],
    this function recovers the original meteorite ratios [A, B, C, D, E] using
    the method from the original GUI code.

    The steps are:
      1. Define the abundance matrix and compute the transfer_matrix as its transpose.
      2. Solve the system:
             transfer_matrix[0:5] * [A, B, C, D, E]^T = metal_ratios[:, 0:5]^T
         for each batch element.
      3. Normalize the solution so that the sum over A, B, C, D, E equals 1.
      4. Multiply elementwise by (10 * abcdelist).

    Args:
        metal_ratios: A torch.Tensor of shape [batch, 6] containing the metal proportions.

    Returns:
        A torch.Tensor of shape [batch, 5] containing the computed A, B, C, D, E values.
    """
    # Ensure metal_ratios is float type:
    metal_ratios = metal_ratios.to(torch.get_default_dtype())

    # Define abcdelist (scaling factors) as in the original code.
    abcdelist = torch.tensor([0.27105, 0.567, 0.5632, 0.935, 0.6885],
                             dtype=metal_ratios.dtype,
                             device=metal_ratios.device)

    # Define the abundance matrix as given.
    # Each row corresponds to a metal and there are 5 rows;
    # In the original code abundance is 5x6.
    abundance = torch.tensor([
        [3.303,    0,     0.328, 0.0055, 0.00155, 0.00149],
        [1.379, 0.0288,       0, 0.1374,   5.167,    0.02],
        [2.4,     0.0521,     0, 0.7697,   1.306,    0.0803],
        [0.97,    0.0132,     0, 1.417,    0.744,    2.759],
        [2.238,   0.0541,     0, 1.142,    1.438,    0.0779]
    ], dtype=metal_ratios.dtype, device=metal_ratios.device)

    # Compute the transfer_matrix as the transpose of abundance.
    # This results in a 6 x 5 matrix.
    transfer_matrix = abundance.T  # shape: (6,5)


    select_idx = [2, 4, 5, 1, 3]


    metal_ratios_sel = metal_ratios[:, select_idx]
    metal_ratios = metal_ratios_sel

    T_mat = transfer_matrix[:5, :]  # shape: (5,5)

    batch = metal_ratios.shape[0]
    # Prepare an output tensor for meteorite ratios.
    meteorite_ratios = torch.empty(
        (batch, 5), dtype=metal_ratios.dtype, device=metal_ratios.device)

    # Solve the system for each batch element.
    # Since T_mat is the same for every batch element, we iterate over the batch.
    for i in range(batch):
        # Solve T_mat x = metal_ratios[i, 0:5]
        # Here, torch.linalg.solve expects the right-hand side to have shape (5,)
        sol = torch.linalg.solve(T_mat, metal_ratios[i, :5])
        meteorite_ratios[i] = sol

    # Normalize each solution so that the sum of [A,B,C,D,E] is 1.
    meteorite_ratios = meteorite_ratios / \
        meteorite_ratios.sum(dim=1, keepdim=True)

    # Multiply elementwise by 10 * abcdelist.
    # This step scales the solution.
    # we dont do it, because the We operate in the normalised cordinate system
    # meteorite_ratios = meteorite_ratios * (10 * abcdelist)

    return meteorite_ratios

In [14]:
metal_sampled = reverse_transform(torch.tensor(df[['Al', 'Ca', 'Fe', 'Mg', 'Mn', 'Ni']
                                                  ].to_numpy(), dtype=torch.float32))

In [15]:
metal_sampled  # order is ABCDE

tensor([[0.5000, 0.1000, 0.1000, 0.2000, 0.1000],
        [0.8000, 0.0500, 0.0500, 0.0500, 0.0500],
        [0.2000, 0.1000, 0.3000, 0.1000, 0.3000],
        ...,
        [0.1000, 0.1000, 0.1000, 0.3000, 0.4000],
        [0.1000, 0.1000, 0.1000, 0.1000, 0.6000],
        [0.1000, 0.1000, 0.1000, 0.2000, 0.5000]])

In [16]:
samples = [str(list(row.tolist())) for row in metal_sampled[:, 0:-1]]

In [17]:
# Create a DataFrame
df_out = pd.DataFrame({
    'samples': samples,
    'energies': df["overpotential"].to_list()
})

In [18]:
metal_sampled

tensor([[0.5000, 0.1000, 0.1000, 0.2000, 0.1000],
        [0.8000, 0.0500, 0.0500, 0.0500, 0.0500],
        [0.2000, 0.1000, 0.3000, 0.1000, 0.3000],
        ...,
        [0.1000, 0.1000, 0.1000, 0.3000, 0.4000],
        [0.1000, 0.1000, 0.1000, 0.1000, 0.6000],
        [0.1000, 0.1000, 0.1000, 0.2000, 0.5000]])

In [19]:
df_out

Unnamed: 0,samples,energies
0,"[0.4999999403953552, 0.10000000894069672, 0.10...",1.7122
1,"[0.7999998927116394, 0.05000000074505806, 0.05...",1.7164
2,"[0.19999998807907104, 0.09999999403953552, 0.3...",1.7228
3,"[0.30000001192092896, 0.30000001192092896, 0.1...",1.7250
4,"[0.19999994337558746, 0.29999998211860657, 0.3...",1.7264
...,...,...
238,"[0.10000009089708328, 0.050000034272670746, 0....",2.2541
239,"[0.1000000387430191, 0.10000003129243851, 0.09...",2.2543
240,"[0.10000003129243851, 0.10000002384185791, 0.0...",2.2612
241,"[0.09999994188547134, 0.09999997168779373, 0.1...",2.2632


In [6]:
df_out.to_csv("states_train_trimmed.csv")

NameError: name 'df_out' is not defined

In [20]:
import torch

In [5]:


# Define the conversion matrix M_list.
# Each metal's coefficients are provided as in your original code.
# We construct M_list with shape (5, 6) (rows correspond to A, B, C, D, E).
metal_names = ['Al', 'Ca', 'Fe', 'Mg', 'Mn', 'Ni']
Al_list = [0.00149, 0.02, 0.0803, 2.759, 0.0779]
Ca_list = [0.0055, 0.1374, 0.7697, 1.417, 1.142]
Fe_list = [3.303, 1.379, 2.4, 0.97, 2.238]
Mg_list = [0.00155, 5.167, 1.306, 0.744, 1.438]
Mn_list = [0, 0.0288, 0.0521, 0.0132, 0.0541]
Ni_list = [0.328, 0, 0, 0, 0]

M_list = torch.tensor([
    Al_list,
    Ca_list,
    Fe_list,
    Mg_list,
    Mn_list,
    Ni_list,
], dtype=torch.float).T  # shape (5, 6)

# Define abcdelist as a tensor of scaling factors for the five components.
# Replace these with your actual values.
# abcdelist = torch.tensor([1.0, 1.0, 1.0, 1.0, 1.0], dtype=torch.float)
abcdelist = torch.tensor(
    [0.27105, 0.567, 0.5632, 0.935, 0.6885], dtype=torch.float32)






In [21]:

def convert_batch(input_batch: torch.Tensor):
    """
    Convert a batch of ABCDE values to metal compositions.

    Each input vector should be of the form [A, B, C, D, E].
    If the last component (E) is less than 0.1, it is set to 0.1 and the row is renormalized.
    The function also computes an outline which is 0.1 - original_E when E was low.

    Args:
        input_batch (torch.Tensor): Tensor of shape (batch_size, 5) containing the [A, B, C, D, E] values.
        abcdelist (torch.Tensor): 1D tensor of shape (5,) used for scaling.
        M_list (torch.Tensor): Conversion matrix of shape (5, 6).

    Returns:
        converted (torch.Tensor): Tensor of shape (batch_size, 6) with normalized metal compositions.
        outline (torch.Tensor): Tensor of shape (batch_size,) with the outline adjustments.
    """
    # Ensure a copy of the input to avoid modifying the original tensor.
    x = input_batch  # shape: (batch_size, 5)
    x = x

    # Compute outline for entries where E < 0.1.
    # Save the original E values.
    original_E = x[:, 4]

    # Create outline tensor (for each sample: outline = max(0, 0.1 - original_E))
    outline = 0.1 - original_E

    # For rows where E < 0.1, set E to 0.1.
    mask = x[:, 4] < 0.1
    if mask.any():
        x[mask, 4] = 0.1
        # Renormalize each row so that the sum becomes 1.
        row_sums = x[mask].sum(dim=1, keepdim=True)
        x[mask] = x[mask] / row_sums

    # Now, perform the conversion:
    # 1. Divide each vector elementwise by abcdelist.
    #    Make sure abcdelist has shape (1,5) to allow broadcasting.
    # skipping this step because we aer working with normalised ABCDE
    x_scaled = x
    # / abcdelist.view(1, -1)

    # 2. Multiply by the conversion matrix.
    #    The multiplication: (batch_size, 5) @ (5, 6) results in (batch_size, 6).
    converted = torch.matmul(x_scaled, M_list)

    # 3. Normalize so that each row sums to 1.
    converted = converted / converted.sum(dim=1, keepdim=True)

    return converted, outline.view(-1)

In [22]:
metals, outline = convert_batch(metal_sampled)

In [4]:
outline

NameError: name 'outline' is not defined

In [323]:
sum(outline)/243

tensor(-0.1089)

In [23]:
metals

tensor([[0.1232, 0.1061, 0.5286, 0.2032, 0.0035, 0.0354],
        [0.0356, 0.0550, 0.7271, 0.1185, 0.0024, 0.0615],
        [0.0670, 0.1503, 0.4707, 0.2911, 0.0074, 0.0135],
        ...,
        [0.1656, 0.1854, 0.3609, 0.2755, 0.0064, 0.0062],
        [0.0658, 0.1815, 0.4246, 0.3133, 0.0083, 0.0065],
        [0.1166, 0.1835, 0.3922, 0.2940, 0.0073, 0.0064]])

In [318]:
sum(metals[0])

tensor(1.)

In [317]:
df

Unnamed: 0,Al,Ca,Fe,Mg,Mn,Ni,overpotential
0,0.123204,0.106073,0.528617,0.203194,0.003486,0.035425,1.7122
1,0.036817,0.044191,0.743975,0.107923,0.001843,0.065252,1.7164
2,0.067023,0.150251,0.470664,0.291139,0.007421,0.013501,1.7228
3,0.059822,0.088446,0.430872,0.396597,0.005039,0.019225,1.7250
4,0.060222,0.101457,0.405555,0.414243,0.005944,0.012578,1.7264
...,...,...,...,...,...,...,...
238,0.350899,0.204542,0.258098,0.177185,0.003356,0.005919,2.2541
239,0.212779,0.187240,0.330747,0.257557,0.005539,0.006137,2.2543
240,0.165565,0.185401,0.360911,0.275456,0.006418,0.006249,2.2612
241,0.065797,0.181515,0.424648,0.313278,0.008277,0.006484,2.2632


In [293]:
metals, outline = convert_batch(metal_sampled)

In [294]:
metals

tensor([[0.2627, 0.1901, 0.3025, 0.2336, 0.0046, 0.0064],
        [0.1263, 0.1576, 0.4535, 0.2378, 0.0057, 0.0190],
        [0.1351, 0.1888, 0.3756, 0.2904, 0.0076, 0.0025],
        ...,
        [0.2578, 0.2145, 0.2863, 0.2348, 0.0057, 0.0008],
        [0.1152, 0.2064, 0.3744, 0.2946, 0.0083, 0.0010],
        [0.1943, 0.2109, 0.3256, 0.2615, 0.0068, 0.0009]])

In [161]:
df

Unnamed: 0,Al,Ca,Fe,Mg,Mn,Ni,overpotential
0,0.123204,0.106073,0.528617,0.203194,0.003486,0.035425,1.7122
1,0.036817,0.044191,0.743975,0.107923,0.001843,0.065252,1.7164
2,0.067023,0.150251,0.470664,0.291139,0.007421,0.013501,1.7228
3,0.059822,0.088446,0.430872,0.396597,0.005039,0.019225,1.7250
4,0.060222,0.101457,0.405555,0.414243,0.005944,0.012578,1.7264
...,...,...,...,...,...,...,...
238,0.350899,0.204542,0.258098,0.177185,0.003356,0.005919,2.2541
239,0.212779,0.187240,0.330747,0.257557,0.005539,0.006137,2.2543
240,0.165565,0.185401,0.360911,0.275456,0.006418,0.006249,2.2612
241,0.065797,0.181515,0.424648,0.313278,0.008277,0.006484,2.2632


In [127]:
outline

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 