In [1]:
import os
import sklearn
import seaborn as sns
import numpy as np
import pandas as pd
import re
import pickle
from tqdm import tqdm
import datetime
import time

from PyAstronomy import pyasl

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from imblearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer, MissingIndicator

from sklearn.preprocessing import (
    StandardScaler, 
    MinMaxScaler, 
    MaxAbsScaler, 
    # RobustScalar,
    Normalizer,
    QuantileTransformer,
    PowerTransformer,
    OneHotEncoder, 
    OrdinalEncoder,
    LabelEncoder
)

from sklearn.utils import all_estimators

from sklearn.base import (
    RegressorMixin, 
    ClassifierMixin,
    TransformerMixin
)

from sklearn.metrics import (
    accuracy_score,
    balanced_accuracy_score,
    auc,
    roc_auc_score,
    f1_score,
    r2_score,
    mean_squared_error,
    classification_report
)

import warnings
import xgboost
import catboost
import lightgbm

import tensorflow as tf

warnings.filterwarnings("ignore")
pd.set_option("display.precision", 2)
pd.set_option("display.float_format", lambda x: "%.2f" % x)

removed_classifiers = [
    "ClassifierChain",
    "ComplementNB",
    # "GradientBoostingClassifier",
    "GaussianProcessClassifier",
    "HistGradientBoostingClassifier",
    # "MLPClassifier",
    "LogisticRegressionCV", 
    "MultiOutputClassifier", 
    "MultinomialNB", 
    "OneVsOneClassifier",
    "OneVsRestClassifier",
    "OutputCodeClassifier",
    "RadiusNeighborsClassifier",
    "VotingClassifier",
    "CategoricalNB",
    "StackingClassifier",
    "NuSVC",
]

removed_regressors = [
    "TheilSenRegressor",
    "ARDRegression", 
    "CCA", 
    "IsotonicRegression", 
    "StackingRegressor",
    "MultiOutputRegressor", 
    "MultiTaskElasticNet", 
    "MultiTaskElasticNetCV", 
    "MultiTaskLasso", 
    "MultiTaskLassoCV", 
    "PLSCanonical", 
    "PLSRegression", 
    "RadiusNeighborsRegressor", 
    "RegressorChain", 
    "VotingRegressor", 
]

CLASSIFIERS = [
    est
    for est in all_estimators()
    if (issubclass(est[1], ClassifierMixin) and (est[0] not in removed_classifiers))
]


REGRESSORS = [
    est
    for est in all_estimators()
    if (issubclass(est[1], RegressorMixin) and (est[0] not in removed_regressors))
]

REGRESSORS.append(("XGBRegressor", xgboost.XGBRegressor))
REGRESSORS.append(("LGBMRegressor", lightgbm.LGBMRegressor))
REGRESSORS.append(('CatBoostRegressor', catboost.CatBoostRegressor))

CLASSIFIERS.append(("XGBClassifier", xgboost.XGBClassifier))
CLASSIFIERS.append(("LGBMClassifier", lightgbm.LGBMClassifier))
CLASSIFIERS.append(('CatBoostClassifier', catboost.CatBoostClassifier))

TRANSFOMER_METHODS = [
    ("StandardScaler", StandardScaler), 
    ("MinMaxScaler", MinMaxScaler), 
    ("MaxAbsScaler", MaxAbsScaler), 
    # ("RobustScalar", RobustScalar),
    ("Normalizer", Normalizer),
    ("QuantileTransformer", QuantileTransformer),
    ("PowerTransformer", PowerTransformer),
]

def adjusted_rsquared(r2, n, p):
    return 1 - (1 - r2) * ((n - 1) / (n - p - 1))

2023-05-19 06:33:10.789731: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-19 06:33:11.784071: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-05-19 06:33:13.857275: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-05-19 06:33:13.857729: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

References:

1. https://discuss.huggingface.co/t/what-does-increasing-number-of-heads-do-in-the-multi-head-attention/1847/2

In [2]:
df_train = pd.read_excel("dataset.xlsx", sheet_name="train")
df_test = pd.read_excel("dataset.xlsx", sheet_name="test")
display(df_train)
display(df_test)

Unnamed: 0,bmg_alloy,delta_h_mix,delta_s_mix,delta_d,delta_e,actual_d_max
0,Al92Nd6Ni2,10.61,0.32,0.20,-0.17,0.00
1,Zr26Ti10Cu8Ni8Be20Y4Mg24,13.89,1.78,0.17,-0.12,5.00
2,Gd55Ni22Mn3Al20,11.82,1.09,0.26,-0.18,2.00
3,Ag53.8Mg15.4Ca30.8,10.05,0.98,0.25,-0.28,0.80
4,Ni60Nb20Zr20,19.88,0.95,0.17,-0.13,0.50
...,...,...,...,...,...,...
1054,Mg65Cu7.5Ni7.5Ag5Zn5Gd7.5Y2.5,9.89,1.25,0.18,-0.16,13.00
1055,Mg65Cu7.5Ni7.5Ag5Zn5Gd2.5Y7.5,9.96,1.25,0.17,-0.16,9.50
1056,Mg80Ni10Nd10,9.39,0.64,0.25,-0.09,0.60
1057,Cu45.5Mg31.8Ca22.7,10.66,1.06,0.34,-0.25,1.25


Unnamed: 0,bmg_alloy,delta_h_mix,delta_s_mix,delta_d,delta_e,actual_d_max,predicted_d_max
0,Co14Ni69P17,14.24,0.83,0.12,0.04,0.00,0.02
1,C14Fe66W20,30.81,0.87,0.14,-0.13,0.00,0.00
2,Cu15La30Mg55,8.64,0.97,0.31,-0.13,0.00,1.77
3,Al87.5Ca2.5Ni10,11.30,0.44,0.21,-0.24,0.00,-0.01
4,B36C8Co56,35.47,0.89,0.11,-0.14,0.00,-0.03
...,...,...,...,...,...,...,...
257,Gd55Ni25Al20,11.94,1.00,0.27,-0.19,2.00,1.36
258,Mg65Ni20La15,10.04,0.89,0.29,-0.14,0.50,0.44
259,B8Fe89Ni3,16.80,0.41,0.16,-0.02,0.00,0.02
260,La62Al14Cu18Ag6,8.44,1.05,0.36,-0.23,5.00,5.77


In [3]:
df_test = df_test.drop(["predicted_d_max"], axis=1)
df = pd.concat([df_train, df_test], axis=0)
df = df.reset_index(drop=True)
display(df)

Unnamed: 0,bmg_alloy,delta_h_mix,delta_s_mix,delta_d,delta_e,actual_d_max
0,Al92Nd6Ni2,10.61,0.32,0.20,-0.17,0.00
1,Zr26Ti10Cu8Ni8Be20Y4Mg24,13.89,1.78,0.17,-0.12,5.00
2,Gd55Ni22Mn3Al20,11.82,1.09,0.26,-0.18,2.00
3,Ag53.8Mg15.4Ca30.8,10.05,0.98,0.25,-0.28,0.80
4,Ni60Nb20Zr20,19.88,0.95,0.17,-0.13,0.50
...,...,...,...,...,...,...
1316,Gd55Ni25Al20,11.94,1.00,0.27,-0.19,2.00
1317,Mg65Ni20La15,10.04,0.89,0.29,-0.14,0.50
1318,B8Fe89Ni3,16.80,0.41,0.16,-0.02,0.00
1319,La62Al14Cu18Ag6,8.44,1.05,0.36,-0.23,5.00


## 1. Encode the bmg_alloys using an RNNs

In [4]:
# constants

n_elements = 118
alloy_max_len = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

max_factor = max(df["actual_d_max"])

print(max_factor)

30.0


In [5]:
# basic utility functions to calculate features

def get_elements_and_compositions(x, verbose = -1):
    # seperating atoms from composition
    s = re.sub(r'[^\w\s]','',x)
    s = re.sub('\d',' ',s)
    elements = np.array([i for i in s.split(' ') if i != ""])
    if verbose > 0:
        print('\nElements in BMG are : ', elements)

    compositions = re.findall(r"[-+]?\d*\.\d+|\d+", x)
    compositions = [float(i) for i in compositions]
    if verbose > 0:
        print('Compositions: ', compositions)
    
    return elements, compositions

def diff(alloy):
    # making ranges for each atom
    ranges = {}
    for i in alloy:
        ranges[i] = 0.88 * parameters[i]["ar"]
    # compiling scoring matrix
    score = {}
    for i in alloy:
        current_score = {}
        for j in alloy:
            if parameters[i]["ar"] < ranges[j]:
                current_score[j] = -1
            elif parameters[i]["ar"] > parameters[j]["ar"]:
                current_score[j] = 1
            else:
                current_score[j] = 0
        score[i] = current_score

    big = []
    small = []
    # separating into big and small based on scoring matrix
    for i in score:
        total_sum = 0
        for j in score[i]:
            total_sum = total_sum + score[i][j]
        if total_sum > 0:
            big.append(i)
        else:
            small.append(i)

    if len(big) == 0 or len(small) == 0:
        print(score)
    return big, small
    
# finds the paramater deltaE
def electro(elements_and_compositions, alloy):
    summation_of_product_of_composition_and_electronegativity = 0 # summation of product of composition and electro negativity
    summation_of_composition = 0 # summation of compositions
    for i in elements_and_compositions:
        if i in alloy:
            summation_of_composition = summation_of_composition + elements_and_compositions[i]
            
    for i in elements_and_compositions:
        if i in alloy:
            summation_of_product_of_composition_and_electronegativity = summation_of_product_of_composition_and_electronegativity + elements_and_compositions[i] * parameters[i]['en']
    return summation_of_product_of_composition_and_electronegativity / summation_of_composition

# finds the paramater deltaD
def comps(elements_and_compositions, alloy):
    summation_of_product_of_composition_and_atomicradii = 0 # summation of product of composition and atomic radii
    summation_of_compositions = 0 # summation of compositions
    for i in elements_and_compositions:
        if i in alloy:
            summation_of_compositions = summation_of_compositions + elements_and_compositions[i]
    for i in elements_and_compositions:
        if i in alloy:
            summation_of_product_of_composition_and_atomicradii = summation_of_product_of_composition_and_atomicradii + elements_and_compositions[i] * parameters[i]['ar']
    return summation_of_product_of_composition_and_atomicradii / summation_of_compositions

def prepare_params(alloy):
    # seperating atoms from composition
    s = re.sub(r'[^\w\s]','', alloy)
    s = re.sub('\d', ' ', s)
    elements = np.array([i for i in s.split(' ') if i in parameters]) # elements list
    # print('\nElements in BMG are : ', elements)

    compositions = re.findall(r"[-+]?\d*\.\d+|\d+", alloy)
    compositions = [float(i) for i in compositions]
    # print('Compositions: ', compositions)

    elements_and_compositions = dict(zip(elements, compositions))
    s_mix = 0
    h_mix = 0

    for i in elements_and_compositions:
        s_mix = s_mix + (elements_and_compositions[i] / 100) * (math.log((elements_and_compositions[i] / 100)))
        h_mix = h_mix + (elements_and_compositions[i] / 100) * parameters[i]['enthalphy']
    s_mix = -1*s_mix

    big, small = diff(elements)
    # print("big atoms : ", big)
    # print("small atoms : ", small)
    delta_d = (comps(elements_and_compositions,big) - comps(elements_and_compositions,small)) / (comps(elements_and_compositions,big))
    delta_e = (electro(elements_and_compositions,big) - electro(elements_and_compositions,small)) / (electro(elements_and_compositions,big) + electro(elements_and_compositions,small))
    return h_mix, s_mix, delta_d, delta_e, elements[np.argmax(compositions)]

In [6]:
# utility functions for encoding

# find element index from all_elements, e.g. "He" = 2
def element_to_index(element):
    try:
        atomic_number = pyasl.AtomicNo()
        return atomic_number.getAtomicNo(element)
    except:
        return "END"
    
def index_to_element(index):
    try:
        atomic_number = pyasl.AtomicNo()
        return atomic_number.getElSymbol(index)
    except:
        return "END"
    
def alloy_to_1d_tensor(alloy_str, alloy_max_len = alloy_max_len):
    tensor = torch.zeros(alloy_max_len)
    elements, compositions = get_elements_and_compositions(alloy_str)
    i = 0
    for idx in range(0, len(elements) + len(compositions), 2):
        tensor[idx] = element_to_index(elements[i])
        tensor[idx + 1] = compositions[i]
        i += 1
    return tensor

In [7]:
# creating the input and output data for the model

X_train = []
y_train = []

for i in range(df.shape[0]):
    X_train.append(alloy_to_1d_tensor(df.loc[i, "bmg_alloy"]))
    y_train.append(df.loc[i, "actual_d_max"] / max_factor)

X_train = torch.stack(X_train)
y_train = torch.tensor(y_train, dtype=torch.float32)

print(X_train.shape, X_train.dtype, y_train.shape, min(y_train).item(), max(y_train).item())

torch.Size([1321, 20]) torch.float32 torch.Size([1321]) 0.0 1.0


In [8]:
import torch
from torch import nn

class SequenceToSigmoid(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x)
        x = self.sigmoid(x)
        return x

In [9]:
# Bidirectional LSTM
# Define hyperparameters
input_size = 20
hidden_size = 128
learning_rate = 0.001
num_epochs = 2000
batch_size = 32
num_layers = 3

# Create an instance of the model
model = SequenceToSigmoid(input_size, hidden_size, num_layers)

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

X_train_tensor = torch.Tensor(X_train)
y_train_tensor = torch.Tensor(y_train)

# Create a PyTorch DataLoader for batching the data
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Set the device to use (CPU or GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print_every = 100
# TODO: print the average loss
# EDIT: we are printing epoch loss of batch

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    # Iterate over the batches of data
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Calculate the loss
        # print("output", outputs.squeeze(), "\nlabel", labels)
        loss = criterion(outputs.squeeze(), labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Update the running loss
        running_loss += loss.item()
    
    # Calculate the average loss for the epoch
    epoch_loss = running_loss / len(train_loader)
    
    # Print the loss for this epoch
    if epoch % print_every == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/2000, Loss: 0.3104
Epoch 101/2000, Loss: 0.1950
Epoch 201/2000, Loss: 0.1921
Epoch 301/2000, Loss: 0.1897
Epoch 401/2000, Loss: 0.2017
Epoch 501/2000, Loss: 0.1865
Epoch 601/2000, Loss: 0.1893
Epoch 701/2000, Loss: 0.1882
Epoch 801/2000, Loss: 0.1853
Epoch 901/2000, Loss: 0.1870
Epoch 1001/2000, Loss: 0.1883
Epoch 1101/2000, Loss: 0.1859
Epoch 1201/2000, Loss: 0.1875
Epoch 1301/2000, Loss: 0.1838
Epoch 1401/2000, Loss: 0.1871
Epoch 1501/2000, Loss: 0.1852
Epoch 1601/2000, Loss: 0.1846
Epoch 1701/2000, Loss: 0.1851
Epoch 1801/2000, Loss: 0.1888
Epoch 1901/2000, Loss: 0.1870


In [10]:
torch.save(model.state_dict(), "rnn_fully_trained.pt")

## 2. Predict d_max and create a new dataset (dataset_rnn) to train another model on

In [15]:
all_rnn_output = []
for i in range(df.shape[0]):
    alloy_tensor = alloy_to_1d_tensor(df.loc[i, "bmg_alloy"])
    output = model(torch.atleast_2d(alloy_tensor))
    all_rnn_output.append(output.squeeze().item() * max_factor)
    
new_full = df

new_full["rnn_encoding"] = all_rnn_output

new_full.to_excel("dataset_rnn_full.xlsx")

In [16]:
display(new_full)

Unnamed: 0,bmg_alloy,delta_h_mix,delta_s_mix,delta_d,delta_e,actual_d_max,rnn_encoding
0,Al92Nd6Ni2,10.61,0.32,0.20,-0.17,0.00,0.00
1,Zr26Ti10Cu8Ni8Be20Y4Mg24,13.89,1.78,0.17,-0.12,5.00,4.92
2,Gd55Ni22Mn3Al20,11.82,1.09,0.26,-0.18,2.00,2.08
3,Ag53.8Mg15.4Ca30.8,10.05,0.98,0.25,-0.28,0.80,0.87
4,Ni60Nb20Zr20,19.88,0.95,0.17,-0.13,0.50,0.50
...,...,...,...,...,...,...,...
1316,Gd55Ni25Al20,11.94,1.00,0.27,-0.19,2.00,2.51
1317,Mg65Ni20La15,10.04,0.89,0.29,-0.14,0.50,0.55
1318,B8Fe89Ni3,16.80,0.41,0.16,-0.02,0.00,0.00
1319,La62Al14Cu18Ag6,8.44,1.05,0.36,-0.23,5.00,5.18


In [6]:
df = pd.read_excel("dataset_rnn_full.xlsx", index_col=0)
df = df.drop(["bmg_alloy"], axis=1)

X_train, y_train = df.loc[:, df.columns != "actual_d_max"], pd.DataFrame(df["actual_d_max"])

X_transformer = QuantileTransformer()
y_transformer = QuantileTransformer()

transformed_X_train = pd.DataFrame(X_transformer.fit_transform(X_train), columns = X_train.columns)
transformed_y_train = pd.DataFrame(y_transformer.fit_transform(y_train), columns = y_train.columns)

model = catboost.CatBoostRegressor()
model.fit(transformed_X_train, transformed_y_train)
pickle.dump(model, open("catboost_model.pkl", "wb"))
pickle.dump(X_transformer, open("catboost_X_quantiletransformer.pkl", "wb"))
pickle.dump(y_transformer, open("catboost_y_quantiletransformer.pkl", "wb"))

Learning rate set to 0.042784
0:	learn: 0.3738436	total: 1.63ms	remaining: 1.63s
1:	learn: 0.3589852	total: 3.25ms	remaining: 1.62s
2:	learn: 0.3447191	total: 4.78ms	remaining: 1.59s
3:	learn: 0.3311903	total: 6.38ms	remaining: 1.59s
4:	learn: 0.3191896	total: 7.96ms	remaining: 1.58s
5:	learn: 0.3064798	total: 10.7ms	remaining: 1.78s
6:	learn: 0.2944641	total: 13.2ms	remaining: 1.87s
7:	learn: 0.2829831	total: 15.7ms	remaining: 1.95s
8:	learn: 0.2721050	total: 25ms	remaining: 2.76s
9:	learn: 0.2615380	total: 26.6ms	remaining: 2.63s
10:	learn: 0.2510950	total: 29.1ms	remaining: 2.62s
11:	learn: 0.2420239	total: 31.2ms	remaining: 2.57s
12:	learn: 0.2335166	total: 32.8ms	remaining: 2.49s
13:	learn: 0.2245795	total: 35.3ms	remaining: 2.49s
14:	learn: 0.2160293	total: 44.6ms	remaining: 2.93s
15:	learn: 0.2074530	total: 46.1ms	remaining: 2.83s
16:	learn: 0.1997489	total: 47.9ms	remaining: 2.77s
17:	learn: 0.1920720	total: 49.6ms	remaining: 2.71s
18:	learn: 0.1849667	total: 51.7ms	remaining: 

167:	learn: 0.0276343	total: 387ms	remaining: 1.92s
168:	learn: 0.0275805	total: 389ms	remaining: 1.91s
169:	learn: 0.0274833	total: 391ms	remaining: 1.91s
170:	learn: 0.0274331	total: 393ms	remaining: 1.9s
171:	learn: 0.0273984	total: 395ms	remaining: 1.9s
172:	learn: 0.0273004	total: 396ms	remaining: 1.89s
173:	learn: 0.0272344	total: 398ms	remaining: 1.89s
174:	learn: 0.0271893	total: 400ms	remaining: 1.89s
175:	learn: 0.0271620	total: 402ms	remaining: 1.88s
176:	learn: 0.0270541	total: 404ms	remaining: 1.88s
177:	learn: 0.0270345	total: 405ms	remaining: 1.87s
178:	learn: 0.0269447	total: 408ms	remaining: 1.87s
179:	learn: 0.0269158	total: 409ms	remaining: 1.86s
180:	learn: 0.0268852	total: 411ms	remaining: 1.86s
181:	learn: 0.0268548	total: 414ms	remaining: 1.86s
182:	learn: 0.0268249	total: 416ms	remaining: 1.86s
183:	learn: 0.0267063	total: 418ms	remaining: 1.85s
184:	learn: 0.0266597	total: 420ms	remaining: 1.85s
185:	learn: 0.0266312	total: 422ms	remaining: 1.85s
186:	learn: 0.

378:	learn: 0.0206226	total: 774ms	remaining: 1.27s
379:	learn: 0.0206118	total: 776ms	remaining: 1.27s
380:	learn: 0.0206039	total: 779ms	remaining: 1.26s
381:	learn: 0.0205875	total: 781ms	remaining: 1.26s
382:	learn: 0.0205710	total: 783ms	remaining: 1.26s
383:	learn: 0.0205066	total: 786ms	remaining: 1.26s
384:	learn: 0.0204911	total: 787ms	remaining: 1.26s
385:	learn: 0.0204649	total: 790ms	remaining: 1.25s
386:	learn: 0.0204519	total: 791ms	remaining: 1.25s
387:	learn: 0.0204373	total: 793ms	remaining: 1.25s
388:	learn: 0.0204185	total: 795ms	remaining: 1.25s
389:	learn: 0.0204109	total: 798ms	remaining: 1.25s
390:	learn: 0.0203475	total: 800ms	remaining: 1.25s
391:	learn: 0.0203356	total: 802ms	remaining: 1.24s
392:	learn: 0.0203112	total: 804ms	remaining: 1.24s
393:	learn: 0.0202909	total: 806ms	remaining: 1.24s
394:	learn: 0.0202655	total: 808ms	remaining: 1.24s
395:	learn: 0.0202059	total: 810ms	remaining: 1.23s
396:	learn: 0.0201984	total: 812ms	remaining: 1.23s
397:	learn: 

575:	learn: 0.0166317	total: 1.16s	remaining: 854ms
576:	learn: 0.0166279	total: 1.16s	remaining: 852ms
577:	learn: 0.0166066	total: 1.16s	remaining: 850ms
578:	learn: 0.0165913	total: 1.17s	remaining: 848ms
579:	learn: 0.0165728	total: 1.17s	remaining: 846ms
580:	learn: 0.0165678	total: 1.17s	remaining: 844ms
581:	learn: 0.0165536	total: 1.17s	remaining: 842ms
582:	learn: 0.0165362	total: 1.17s	remaining: 840ms
583:	learn: 0.0165206	total: 1.18s	remaining: 838ms
584:	learn: 0.0165135	total: 1.18s	remaining: 836ms
585:	learn: 0.0165012	total: 1.18s	remaining: 833ms
586:	learn: 0.0164900	total: 1.18s	remaining: 831ms
587:	learn: 0.0164775	total: 1.18s	remaining: 829ms
588:	learn: 0.0164716	total: 1.19s	remaining: 827ms
589:	learn: 0.0164658	total: 1.19s	remaining: 825ms
590:	learn: 0.0164521	total: 1.19s	remaining: 824ms
591:	learn: 0.0164424	total: 1.19s	remaining: 823ms
592:	learn: 0.0164344	total: 1.2s	remaining: 821ms
593:	learn: 0.0164246	total: 1.2s	remaining: 818ms
594:	learn: 0.

768:	learn: 0.0141019	total: 1.54s	remaining: 464ms
769:	learn: 0.0140907	total: 1.55s	remaining: 462ms
770:	learn: 0.0140783	total: 1.55s	remaining: 460ms
771:	learn: 0.0140661	total: 1.55s	remaining: 458ms
772:	learn: 0.0140569	total: 1.55s	remaining: 456ms
773:	learn: 0.0140423	total: 1.55s	remaining: 454ms
774:	learn: 0.0140381	total: 1.56s	remaining: 452ms
775:	learn: 0.0140336	total: 1.56s	remaining: 450ms
776:	learn: 0.0140312	total: 1.56s	remaining: 448ms
777:	learn: 0.0140253	total: 1.56s	remaining: 446ms
778:	learn: 0.0140190	total: 1.56s	remaining: 444ms
779:	learn: 0.0140067	total: 1.57s	remaining: 442ms
780:	learn: 0.0139959	total: 1.57s	remaining: 440ms
781:	learn: 0.0139896	total: 1.57s	remaining: 438ms
782:	learn: 0.0139779	total: 1.57s	remaining: 436ms
783:	learn: 0.0139696	total: 1.57s	remaining: 434ms
784:	learn: 0.0139636	total: 1.57s	remaining: 432ms
785:	learn: 0.0139502	total: 1.58s	remaining: 430ms
786:	learn: 0.0139394	total: 1.58s	remaining: 428ms
787:	learn: 

975:	learn: 0.0120477	total: 1.93s	remaining: 47.5ms
976:	learn: 0.0120387	total: 1.93s	remaining: 45.5ms
977:	learn: 0.0120230	total: 1.94s	remaining: 43.6ms
978:	learn: 0.0120196	total: 1.94s	remaining: 41.6ms
979:	learn: 0.0120139	total: 1.94s	remaining: 39.6ms
980:	learn: 0.0120107	total: 1.94s	remaining: 37.6ms
981:	learn: 0.0120045	total: 1.94s	remaining: 35.6ms
982:	learn: 0.0120019	total: 1.95s	remaining: 33.7ms
983:	learn: 0.0119917	total: 1.95s	remaining: 31.7ms
984:	learn: 0.0119891	total: 1.95s	remaining: 29.7ms
985:	learn: 0.0119876	total: 1.95s	remaining: 27.7ms
986:	learn: 0.0119854	total: 1.95s	remaining: 25.7ms
987:	learn: 0.0119834	total: 1.96s	remaining: 23.8ms
988:	learn: 0.0119752	total: 1.96s	remaining: 21.8ms
989:	learn: 0.0119703	total: 1.96s	remaining: 19.8ms
990:	learn: 0.0119539	total: 1.96s	remaining: 17.8ms
991:	learn: 0.0119438	total: 1.96s	remaining: 15.8ms
992:	learn: 0.0119246	total: 1.96s	remaining: 13.8ms
993:	learn: 0.0119143	total: 1.97s	remaining: 

In [7]:
print(transformed_X_train)

      delta_h_mix  delta_s_mix  delta_d  delta_e  rnn_encoding
0            0.26         0.03     0.49     0.26          0.40
1            0.46         1.00     0.44     0.45          0.83
2            0.36         0.79     0.76     0.22          0.64
3            0.20         0.64     0.74     0.00          0.53
4            0.77         0.60     0.44     0.42          0.51
...           ...          ...      ...      ...           ...
1316         0.37         0.66     0.77     0.19          0.68
1317         0.19         0.49     0.81     0.39          0.51
1318         0.61         0.09     0.41     0.80          0.17
1319         0.05         0.73     0.95     0.11          0.85
1320         0.24         0.84     0.48     0.02          0.51

[1321 rows x 5 columns]
