# Load Training Data

In [1]:
import pandas as pd
from datasets import Dataset, DatasetDict

f_train = r"/scratch/gilbreth/mishr199/train.csv"
df = pd.read_csv(f_train)

df['translation'] = df.apply(lambda row: {'en': row['eng'], 'ru': row['rus']}, axis=1)
df = df[['translation']]

dataset = Dataset.from_pandas(df)

# Shuffle the dataset and then select 11,000 examples
dataset = dataset.shuffle(seed=26).select(range(2000))

train_val_split = dataset.train_test_split(test_size=0.2)  # 10% for validation, 90% for training

train_val_split = DatasetDict({
    'train': train_val_split['train'],
    'validation': train_val_split['test']
})

In [2]:
train_val_split

DatasetDict({
    train: Dataset({
        features: ['translation'],
        num_rows: 1600
    })
    validation: Dataset({
        features: ['translation'],
        num_rows: 400
    })
})

In [3]:
import random

def random_layer_selection():
  return random.randint(1, 5)

def random_activation_function():
  activations = ["ReLU", "GELU", "LeakyReLU", "Sigmoid", "Tanh"]
  return random.choice(activations)

def random_dropout():
  return round(random.uniform(0.05, 0.50), 2)

In [4]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-base")
def preprocess_function(examples):
    # examples["translation"] is a list of dictionaries, so you iterate through it
    inputs = ["translate Russian to English: " + example["ru"] for example in examples["translation"]]
    targets = [example["en"] for example in examples["translation"]]

    # Tokenize the inputs and targets
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")

    # Tokenize the targets with the labels
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length").input_ids

    model_inputs["labels"] = labels
    return model_inputs
tokenized_datasets = train_val_split.map(preprocess_function, batched=True)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/1600 [00:00<?, ? examples/s]

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

In [5]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['translation', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 1600
    })
    validation: Dataset({
        features: ['translation', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 400
    })
})

In [6]:
from torch.utils.data import DataLoader
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
import torch.nn as nn
import random
from datasets import load_dataset

# Load T5 model and tokenizer
model_name = "google-t5/t5-small"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

# Use DataCollatorWithPadding for dynamic padding at batch level
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=True)
# Parameters
batch_size = 32  # Number of samples per batch
shuffle = True   # Shuffle the data during training

# Initialize DataLoaders
# Convert to DataLoader
train_dataloader = DataLoader(tokenized_datasets['train'], batch_size=32, shuffle=True)
val_dataloader = DataLoader(tokenized_datasets['validation'], batch_size=32)
learning_rate=5e-5
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [22]:
import random
import time
from transformers import Trainer, TrainerCallback, TrainingArguments
import sys

# Function to modify the model
def modify_model(model):
    # Randomly choose a new activation function
    random.seed(time.time())
    new_activation = random_activation_function()
    print("new_activation: ", new_activation)

    # Randomly choose a layer
    layer = random_layer_selection()
    print("layer choosed: ", layer)

    # Randomly choose the dropout percentage
    drop = random_dropout()
    print("drop percentage: ", drop)

    # Update model structure
    model.encoder.block[layer].layer[1].DenseReluDense.dropout = nn.Dropout(drop)
    model.encoder.block[layer].layer[1].DenseReluDense.act = getattr(nn, new_activation)()

    model.decoder.block[layer].layer[2].DenseReluDense.dropout = nn.Dropout(drop)
    model.decoder.block[layer].layer[2].DenseReluDense.act = getattr(nn, new_activation)()

    print("========================================================================")
    print(model.encoder.block[layer], model.decoder.block[layer])
    print("Model Structure: ", model)
    print("========================================================================")
    return model

# Custom trainer class
class CustomTrainer(Trainer):
    def __init__(self, *args, threshold=0.5, **kwargs):
        super().__init__(*args, **kwargs)
        self.threshold = threshold

    def on_epoch_end(self):
        print("========== on_epoch_end CALLED ==========")
        super().on_epoch_end()

        # Access the training and evaluation loss
        train_loss = self.state.log_history[-2].get("loss", None)  # Training loss
        eval_loss = self.state.log_history[-1].get("eval_loss", None)  # Evaluation loss

        if train_loss is not None and eval_loss is not None:
            print(f"Train Loss: {train_loss}, Eval Loss: {eval_loss}")

            # Check if evaluation loss exceeds training loss by the threshold
            if eval_loss - train_loss > self.threshold:
                print("Loss threshold exceeded. Modifying model...")
                self.model = modify_model(self.model)
                print("Model modified successfully.")
            else:
                print("Loss threshold not exceeded. No modifications applied.")

# ModifyModelCallback for additional monitoring
class ModifyModelCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        print(f"Epoch {state.epoch} ended. Checking loss conditions...")
        sys.stdout.flush()
        if model is not None:
            print(state)
            train_loss = state.log_history[-2].get("loss", None)
            eval_loss = state.log_history[-1].get("eval_loss", None)
            print(f"Train Loss: {train_loss}, Eval Loss: {eval_loss}")

            if train_loss is not None and eval_loss is not None and eval_loss - train_loss > args.threshold:
                print("Loss threshold exceeded in callback. Modifying model...")
                modify_model(model)
                print("Model modified successfully after epoch.")
            else:
                print("Loss threshold not exceeded in callback. No modifications applied.")
        sys.stdout.flush()

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=5,
    weight_decay=0.23,
    save_total_limit=1,
    logging_dir='./logs',
    logging_steps=10,
)

# Initialize the custom trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    callbacks=[ModifyModelCallback()],
    threshold=0.5  # Set the loss threshold
)

# Train the model
trainer.train()




TypeError: Trainer.__init__() got an unexpected keyword argument 'threshold'

In [23]:
from transformers import Trainer, TrainerCallback, TrainingArguments
import torch.nn as nn
import random
import time
import sys

# Helper functions for modifying the model
def random_activation_function():
    # List of activation functions (modify as needed)
    activations = ['ReLU', 'Tanh', 'Sigmoid']
    return random.choice(activations)

def random_layer_selection():
    # Replace `10` with the actual number of layers in the model
    return random.randint(0, 9)

def random_dropout():
    return random.uniform(0.1, 0.5)

def modify_model(model):
    # Randomly choose a new activation function
    random.seed(time.time())
    new_activation = random_activation_function()
    print("new_activation: ", new_activation)
    
    # Randomly choose a layer
    layer = random_layer_selection()
    print("layer choosed: ", layer)
    
    # Randomly choose the dropout percentage
    drop = random_dropout()
    print("drop percentage: ", drop)
    
    # Update model structure
    model.encoder.block[layer].layer[1].DenseReluDense.dropout = nn.Dropout(drop)
    model.encoder.block[layer].layer[1].DenseReluDense.act = getattr(nn, new_activation)()
    
    model.decoder.block[layer].layer[2].DenseReluDense.dropout = nn.Dropout(drop)
    model.decoder.block[layer].layer[2].DenseReluDense.act = getattr(nn, new_activation)()
    
    print("========================================================================")
    print(model.encoder.block[layer], model.decoder.block[layer])
    print("Model Structure: ", model)
    print("========================================================================")
    return model

# Custom Trainer
class CustomTrainer(Trainer):
    def __init__(self, *args, loss_threshold=0.5, **kwargs):
        super().__init__(*args, **kwargs)
        self.loss_threshold = loss_threshold
    
    def on_epoch_end(self):
        print("========== on_epoch_end CALLED ==========")
        super().on_epoch_end()
        
        # Get evaluation loss
        eval_metrics = self.evaluate()
        print(f"Evaluation Metrics: {eval_metrics}")  # Print the entire metrics dictionary to inspect available keys
        eval_loss = eval_metrics.get("eval_loss") or eval_metrics.get("loss") or None
        if eval_loss is not None:
            print(f"Eval Loss: {eval_loss}")
        else:
            print("Evaluation loss key not found in metrics.")
        
        # Compare eval_loss with the threshold and modify the model if needed
        if eval_loss is not None and eval_loss > self.loss_threshold:
            print(f"Eval loss {eval_loss} exceeds threshold {self.loss_threshold}. Modifying model...")
            self.model = modify_model(self.model)
            print("Model modification completed.")
        else:
            print(f"Eval loss {eval_loss} is below the threshold {self.loss_threshold}. No modification applied.")

# Callback for additional flexibility
class ModifyModelCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        print(f"Epoch {state.epoch} ended. Modifying model...")
        sys.stdout.flush()
        if model is not None:
            modify_model(model)
        print("Model modified successfully after epoch.")
        sys.stdout.flush()

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=5,
    weight_decay=0.23,
    save_total_limit=1,
    logging_dir='./logs',
    logging_steps=10,
)

# Initialize the custom trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    callbacks=[ModifyModelCallback()],
    loss_threshold=0.5,  # Set the desired loss threshold here
)

# Train the model
trainer.train()


Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss
1,0.1813,0.169085
2,0.1738,0.266237


Epoch 1.0 ended. Modifying model...
new_activation:  ReLU
layer choosed:  1
drop percentage:  0.27151477894617543
T5Block(
  (layer): ModuleList(
    (0): T5LayerSelfAttention(
      (SelfAttention): T5Attention(
        (q): Linear(in_features=512, out_features=512, bias=False)
        (k): Linear(in_features=512, out_features=512, bias=False)
        (v): Linear(in_features=512, out_features=512, bias=False)
        (o): Linear(in_features=512, out_features=512, bias=False)
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (1): T5LayerFF(
      (DenseReluDense): T5DenseActDense(
        (wi): Linear(in_features=512, out_features=2048, bias=False)
        (wo): Linear(in_features=2048, out_features=512, bias=False)
        (dropout): Dropout(p=0.27151477894617543, inplace=False)
        (act): ReLU()
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
) T5Block(
  (layer): ModuleList(
    (

Epoch 2.0 ended. Modifying model...
new_activation:  Tanh
layer choosed:  4
drop percentage:  0.48887679800733097
T5Block(
  (layer): ModuleList(
    (0): T5LayerSelfAttention(
      (SelfAttention): T5Attention(
        (q): Linear(in_features=512, out_features=512, bias=False)
        (k): Linear(in_features=512, out_features=512, bias=False)
        (v): Linear(in_features=512, out_features=512, bias=False)
        (o): Linear(in_features=512, out_features=512, bias=False)
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (1): T5LayerFF(
      (DenseReluDense): T5DenseActDense(
        (wi): Linear(in_features=512, out_features=2048, bias=False)
        (wo): Linear(in_features=2048, out_features=512, bias=False)
        (dropout): Dropout(p=0.48887679800733097, inplace=False)
        (act): Tanh()
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
) T5Block(
  (layer): ModuleList(
    (

Epoch 3.0 ended. Modifying model...
new_activation:  Sigmoid
layer choosed:  7
drop percentage:  0.4173183773749194


IndexError: index 7 is out of range

In [7]:
import random
import time
def modify_model(model):
        # Randomly choose a new activation function
        random.seed(time.time())
        new_activation = random_activation_function()
        print("new_activation: ", new_activation)
        
        # Randomly choose a layer
        layer = random_layer_selection()
        print("layer choosed: ", layer)
        
        #Randomly choose the dropout percentage
        drop = random_dropout()
        print("drop percentage: ", drop)
        
        #Update model structure
        model.encoder.block[layer].layer[1].DenseReluDense.dropout = nn.Dropout(drop)
        model.encoder.block[layer].layer[1].DenseReluDense.act = getattr(nn, new_activation)()
        
        model.decoder.block[layer].layer[2].DenseReluDense.dropout = nn.Dropout(drop)
        model.decoder.block[layer].layer[2].DenseReluDense.act = getattr(nn, new_activation)()
       
        print("========================================================================")
        print(model.encoder.block[layer],model.decoder.block[layer])
        print("Model Structre : ",model)
        print("========================================================================")
        return model

In [8]:
class CustomTrainer(Trainer):
    def on_epoch_end(self):
        print("========== on_epoch_end CALLED ==========")
        super().on_epoch_end()
        print("Calling modify_model...")
        self.model = modify_model(self.model)
        print("modify_model successfully called and applied.")

In [9]:
from transformers import TrainerCallback
import sys

class ModifyModelCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        print(f"Epoch {state.epoch} ended. Modifying model...")
        sys.stdout.flush()
        if model is not None:
            modify_model(model)
        print("Model modified successfully after epoch.")
        sys.stdout.flush()

In [15]:
from transformers import TrainerCallback
import sys
import re
import torch

class ModifyModelCallback(TrainerCallback):
    def __init__(self):
        super().__init__()
        self.validation_loss = None  # To store the most recent validation loss
        

    def on_evaluate(self, args, state, control, **kwargs):
        # Ensure eval_loss is captured during evaluation
        for log in reversed(state.log_history):
            if "eval_loss" in log:
                
                self.validation_loss = log["eval_loss"]
                print(f"Validation loss captured: {self.validation_loss}")
                sys.stdout.flush()
                break  # Use the most recent eval_loss
        if self.validation_loss > 0.15:
            print("Start Modification of Model")
            
            # Define the Feed-Forward Neural Network
            
            import pickle
            with open("model.pkl", "rb") as f:
                loaded_model = pickle.load(f)
            loaded_model.eval()
            columns = [
                        'Encoder0_Activation_ReLU', 'Encoder1_Activation_ReLU', 'Encoder2_Activation_ReLU', 'Encoder3_Activation_ReLU', 'Encoder4_Activation_ReLU', 'Encoder5_Activation_ReLU',
                        'Encoder0_Activation_GELU', 'Encoder1_Activation_GELU', 'Encoder2_Activation_GELU', 'Encoder3_Activation_GELU', 'Encoder4_Activation_GELU', 'Encoder5_Activation_GELU',
                        'Encoder0_Activation_LeakyReLU', 'Encoder1_Activation_LeakyReLU', 'Encoder2_Activation_LeakyReLU', 'Encoder3_Activation_LeakyReLU', 'Encoder4_Activation_LeakyReLU', 'Encoder5_Activation_LeakyReLU',
                        'Encoder0_Activation_Sigmoid', 'Encoder1_Activation_Sigmoid', 'Encoder2_Activation_Sigmoid', 'Encoder3_Activation_Sigmoid', 'Encoder4_Activation_Sigmoid', 'Encoder5_Activation_Sigmoid',
                        'Encoder0_Activation_Tanh', 'Encoder1_Activation_Tanh', 'Encoder2_Activation_Tanh', 'Encoder3_Activation_Tanh', 'Encoder4_Activation_Tanh', 'Encoder5_Activation_Tanh',
                        'Decoder0_Activation_ReLU', 'Decoder1_Activation_ReLU', 'Decoder2_Activation_ReLU', 'Decoder3_Activation_ReLU', 'Decoder4_Activation_ReLU', 'Decoder5_Activation_ReLU',
                        'Decoder0_Activation_GELU', 'Decoder1_Activation_GELU', 'Decoder2_Activation_GELU', 'Decoder3_Activation_GELU', 'Decoder4_Activation_GELU', 'Decoder5_Activation_GELU',
                        'Decoder0_Activation_LeakyReLU', 'Decoder1_Activation_LeakyReLU', 'Decoder2_Activation_LeakyReLU', 'Decoder3_Activation_LeakyReLU', 'Decoder4_Activation_LeakyReLU', 'Decoder5_Activation_LeakyReLU',
                        'Decoder0_Activation_Sigmoid', 'Decoder1_Activation_Sigmoid', 'Decoder2_Activation_Sigmoid', 'Decoder3_Activation_Sigmoid', 'Decoder4_Activation_Sigmoid', 'Decoder5_Activation_Sigmoid',
                        'Decoder0_Activation_Tanh', 'Decoder1_Activation_Tanh', 'Decoder2_Activation_Tanh', 'Decoder3_Activation_Tanh', 'Decoder4_Activation_Tanh', 'Decoder5_Activation_Tanh',
                        'Encoder0_Activation_DropOut', 'Encoder1_Activation_DropOut', 'Encoder2_Activation_DropOut', 'Encoder3_Activation_DropOut', 'Encoder4_Activation_DropOut', 'Encoder5_Activation_DropOut',
                        'Decoder0_Activation_DropOut', 'Decoder1_Activation_DropOut', 'Decoder2_Activation_DropOut', 'Decoder3_Activation_DropOut', 'Decoder4_Activation_DropOut', 'Decoder5_Activation_DropOut',
                        'Training Loss', 'Validation Loss']
        
            # Create an empty DataFrame with the columns
            df2 = pd.DataFrame(columns=columns)
            
            for num in range(6):
                    # Check if the layer is part of encoder or decoder
                block = model.encoder.block[num].layer[1]  # Access encoder block
                
                

                # Check for layers inside the block (adjust this based on actual structure)
                activation = type(block.DenseReluDense.act)
                dropout = block.DenseReluDense.dropout
                temp = re.search(r"p=([\d.]+)", str(dropout))
                print("temp",temp)
                if temp:
                    val = float(temp.group(1))
                    #print(dropout)
                layer_type = 'encoder'
                # Set activation values for each type
                #Encoder0_Activation_ReLU
                
                layer = "Encoder" + str(num)
                print(num, activation,layer, val)
                # Set dropout values if True
                if dropout:
                    name = layer + '_Activation_DropOut'
                    df2[name] = [val]
                    
                if activation == torch.nn.modules.activation.ReLU:
                    layer = layer + '_Activation_ReLU'
                    print('layer',layer)
                    df2[layer] = [1]
                    #print(df)
                elif activation == torch.nn.modules.activation.GELU:
                    #df[layer + '_Activation_GELU'] = [1]
                    layer = layer + '_Activation_GELU'
                    print('layer',layer)
                    df2[layer] = [1]
                elif activation == torch.nn.modules.activation.LeakyReLU:
                    #df[layer + '_Activation_LeakyReLU'] = [1]
                    layer = layer + '_Activation_LeakyReLU'
                    print('layer',layer)
                    df2[layer] = [1]
                elif activation == torch.nn.modules.activation.Sigmoid:
                    #df[layer + '_Activation_Sigmoid'] = [1]
                    layer = layer + '_Activation_Sigmoid'
                    print('layer',layer)
                    df2[layer] = [1]
                elif activation == torch.nn.modules.activation.Tanh:
                    #df[layer + '_Activation_Tanh'] = [1]
                    layer = layer + '_Activation_Tanh'
                    print('layer',layer)
                    df2[layer] = [1]

                
                    
                
                block = model.decoder.block[num].layer[2]  # Access decoder block
                
                # Check for layers inside the block (adjust this based on actual structure)
                activation = block.DenseReluDense.act
                dropout = block.DenseReluDense.dropout
                temp = re.search(r"p=([\d.]+)", str(dropout))
                if temp:
                    val = float(temp.group(1))
                layer_type = 'decoder'
                # Set activation values for each type
                layer = "Decoder" + str(num)
                print(num, activation,layer, dropout)
                
                # Set dropout values if True
                if dropout:
                    name = layer + '_Activation_DropOut'
                    df2[name] = [val]
                      
                if activation == torch.nn.modules.activation.ReLU:
                    layer = layer + '_Activation_ReLU'
                    print('layer',layer)
                    df2[layer] = [1]
                    #print(df)
                elif activation == torch.nn.modules.activation.GELU:
                    #df[layer + '_Activation_GELU'] = [1]
                    layer = layer + '_Activation_GELU'
                    print('layer',layer)
                    df2[layer] = [1]
                elif activation == torch.nn.modules.activation.LeakyReLU:
                    #df[layer + '_Activation_LeakyReLU'] = [1]
                    layer = layer + '_Activation_LeakyReLU'
                    print('layer',layer)
                    df2[layer] = [1]
                elif activation == torch.nn.modules.activation.Sigmoid:
                    #df[layer + '_Activation_Sigmoid'] = [1]
                    layer = layer + '_Activation_Sigmoid'
                    print('layer',layer)
                    df2[layer] = [1]
                elif activation == torch.nn.modules.activation.Tanh:
                    #df[layer + '_Activation_Tanh'] = [1]
                    layer = layer + '_Activation_Tanh'
                    print('layer',layer)
                    df2[layer] = [1]

                
            df2.fillna(0, inplace=True)
            print(df2)
            #X = df2.drop(columns=["Validation Loss", "Training Loss"])
            X_tensor = torch.tensor(df2.drop(columns=["Validation Loss", "Training Loss"]).values, dtype=torch.float32)
            with torch.no_grad():
                y_test_pred = loaded_model(X_tensor)
                y_test_pred = y_test_pred.numpy()
            print("Predicted", y_test_pred, "Real", self.validation_loss)
            if y_test_pred < self.validation_loss: 
                modify_model(model)
#             for num in range(6):
#                 if model.encoder.block[num].layer[1].DenseReluDense.act == "ReLU":
#                     print("Num",num)
#                     print("============================================")
#                     print(model.encoder.block[num].layer[1].DenseReluDense.dropout)
#                     print(model.encoder.block[num].layer[1].DenseReluDense.act)
#                     print(model.decoder.block[num].layer[2].DenseReluDense.dropout)
#                     print(model.decoder.block[num].layer[2].DenseReluDense.act)
#                     print("============================================")
            #Working Piece
            ######modify_model(model)
            # Pass the validation loss to modify_model
        #    modify_model(model, self.validation_loss)
    
    def on_epoch_end(self, args, state, control, model=None, **kwargs):
        
        print("============================================")
        #print(model)
        print("============================================")
        print("============================================")
        print(f"Epoch {state.epoch} ended. Validation loss: {self.validation_loss}")
        sys.stdout.flush()
        #print(f"Epoch {state.epoch} ended. Validation loss: {self.validation_loss}")
        #sys.stdout.flush()
        
        #if model is not None and self.validation_loss is not None:
            # Pass the validation loss to modify_model
        #    modify_model(model, self.validation_loss)
        
        #print("Model modified successfully after epoch.")
        #sys.stdout.flush()


In [16]:
    # Define training arguments
    training_args = TrainingArguments(
        output_dir="./results",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        num_train_epochs=5,
        weight_decay=0.23,
        save_total_limit=1,
        logging_dir='./logs',
        logging_steps=10,
    )

    # Initialize the custom trainer
    trainer = CustomTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['validation'],
        callbacks=[ModifyModelCallback()],
    )

    # Train the model
    trainer.train()

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Epoch,Training Loss,Validation Loss
1,0.1686,0.166769
2,0.1468,0.166346
3,0.2049,0.17961
4,0.1977,0.183399
5,0.1986,0.182352


Epoch 1.0 ended. Validation loss: None
Validation loss captured: 0.16676907241344452
Start Modification of Model
temp <re.Match object; span=(8, 13), match='p=0.1'>
0 <class 'torch.nn.modules.activation.ReLU'> Encoder0 0.1
layer Encoder0_Activation_ReLU
0 ReLU() Decoder0 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
1 <class 'torch.nn.modules.activation.ReLU'> Encoder1 0.1
layer Encoder1_Activation_ReLU
1 ReLU() Decoder1 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
2 <class 'torch.nn.modules.activation.ReLU'> Encoder2 0.1
layer Encoder2_Activation_ReLU
2 ReLU() Decoder2 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
3 <class 'torch.nn.modules.activation.ReLU'> Encoder3 0.1
layer Encoder3_Activation_ReLU
3 ReLU() Decoder3 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
4 <class 'torch.nn.modules.activation.ReLU'> Encoder4 0.1
layer Encoder4_Activa

Epoch 2.0 ended. Validation loss: 0.16676907241344452
Validation loss captured: 0.16634629666805267
Start Modification of Model
temp <re.Match object; span=(8, 13), match='p=0.1'>
0 <class 'torch.nn.modules.activation.ReLU'> Encoder0 0.1
layer Encoder0_Activation_ReLU
0 ReLU() Decoder0 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
1 <class 'torch.nn.modules.activation.ReLU'> Encoder1 0.1
layer Encoder1_Activation_ReLU
1 ReLU() Decoder1 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
2 <class 'torch.nn.modules.activation.ReLU'> Encoder2 0.1
layer Encoder2_Activation_ReLU
2 ReLU() Decoder2 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
3 <class 'torch.nn.modules.activation.ReLU'> Encoder3 0.1
layer Encoder3_Activation_ReLU
3 ReLU() Decoder3 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 14), match='p=0.48'>
4 <class 'torch.nn.modules.activation.Sigmoid'> Encoder4 0.48
l

Epoch 3.0 ended. Validation loss: 0.16634629666805267
Validation loss captured: 0.1796102076768875
Start Modification of Model
temp <re.Match object; span=(8, 13), match='p=0.1'>
0 <class 'torch.nn.modules.activation.ReLU'> Encoder0 0.1
layer Encoder0_Activation_ReLU
0 ReLU() Decoder0 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
1 <class 'torch.nn.modules.activation.ReLU'> Encoder1 0.1
layer Encoder1_Activation_ReLU
1 ReLU() Decoder1 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
2 <class 'torch.nn.modules.activation.ReLU'> Encoder2 0.1
layer Encoder2_Activation_ReLU
2 ReLU() Decoder2 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 14), match='p=0.45'>
3 <class 'torch.nn.modules.activation.Tanh'> Encoder3 0.45
layer Encoder3_Activation_Tanh
3 Tanh() Decoder3 Dropout(p=0.45, inplace=False)
temp <re.Match object; span=(8, 14), match='p=0.48'>
4 <class 'torch.nn.modules.activation.Sigmoid'> Encoder4 0.48

Epoch 4.0 ended. Validation loss: 0.1796102076768875
Validation loss captured: 0.18339946866035461
Start Modification of Model
temp <re.Match object; span=(8, 13), match='p=0.1'>
0 <class 'torch.nn.modules.activation.ReLU'> Encoder0 0.1
layer Encoder0_Activation_ReLU
0 ReLU() Decoder0 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 14), match='p=0.22'>
1 <class 'torch.nn.modules.activation.Sigmoid'> Encoder1 0.22
layer Encoder1_Activation_Sigmoid
1 Sigmoid() Decoder1 Dropout(p=0.22, inplace=False)
temp <re.Match object; span=(8, 13), match='p=0.1'>
2 <class 'torch.nn.modules.activation.ReLU'> Encoder2 0.1
layer Encoder2_Activation_ReLU
2 ReLU() Decoder2 Dropout(p=0.1, inplace=False)
temp <re.Match object; span=(8, 14), match='p=0.45'>
3 <class 'torch.nn.modules.activation.Tanh'> Encoder3 0.45
layer Encoder3_Activation_Tanh
3 Tanh() Decoder3 Dropout(p=0.45, inplace=False)
temp <re.Match object; span=(8, 14), match='p=0.48'>
4 <class 'torch.nn.modules.activation.Sigmoid'> E

TrainOutput(global_step=4000, training_loss=0.2029939422607422, metrics={'train_runtime': 465.5795, 'train_samples_per_second': 17.183, 'train_steps_per_second': 8.591, 'total_flos': 270683602944000.0, 'train_loss': 0.2029939422607422, 'epoch': 5.0})

In [None]:
print(trainer.model)

In [13]:
trainer.save_model("/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3")  # Saves model, configuration, and tokenizer
tokenizer.save_pretrained("/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/tokenizer")

('/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/tokenizer/tokenizer_config.json',
 '/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/tokenizer/special_tokens_map.json',
 '/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/tokenizer/spiece.model',
 '/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/tokenizer/added_tokens.json')

In [14]:
from transformers import Trainer
from transformers import T5Tokenizer, T5ForConditionalGeneration

mp = r"/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3"
mp_t = r"/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/tokenizer"

# Load your fine-tuned model and tokenizer
model = T5ForConditionalGeneration.from_pretrained(mp)
tokenizer = T5Tokenizer.from_pretrained(mp_t)

# Reinitialize the Trainer with the loaded model and tokenizer
trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    # Add other training arguments here, if needed
)

  trainer = Trainer(
Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [15]:
import pandas as pd
f_test = r"/scratch/gilbreth/mishr199/test.csv"
df = pd.read_csv(f_test)
df['translation'] = df.apply(lambda row: {'en': row['eng'], 'ru': row['rus']}, axis=1)
from datasets import Dataset
df = df[['translation']]

# Function to batch dataframe in groups of 16
def batch_dataframe(df, batch_size):
    return [df.iloc[i:i + batch_size] for i in range(0, len(df), batch_size)]

# Batch the dataframe
batches = batch_dataframe(df, 100)

In [16]:
def preprocess_functionTest(examples):
    if isinstance(examples["translation"], dict):
        inputs = ["translate Russian to English: " + example for example in examples["translation"]["ru"]]
        targets = [example for example in examples["translation"]["en"]]
    else:
        inputs = ["translate Russian to English: " + example["ru"] for example in examples["translation"]]
        targets = [example["en"] for example in examples["translation"]]
    
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length").input_ids
    model_inputs["labels"] = labels
    return model_inputs

In [17]:
import numpy as np
import torch
all_predictions = []
for i in batches:
    datasetTest = Dataset.from_pandas(i)
    test_dataset = datasetTest.map(preprocess_functionTest, batched=True)
    # Clear any previous GPU allocations
    torch.cuda.empty_cache()
    # Predict on the current batch
    with torch.no_grad():
        test_results = trainer.predict(test_dataset)
        predictions = test_results.predictions[0] if isinstance(test_results.predictions, tuple) else test_results.predictions
        decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions.argmax(-1)]

        all_predictions.extend(decoded_preds)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/68 [00:00<?, ? examples/s]

In [18]:
df = pd.read_csv(f_test)

# Store the predictions along with input and actual text
results_df = pd.DataFrame({
    'input_text': df['rus'],  # Russian input text
    'predicted_text': all_predictions,   # Model's predicted translation
    'actual_text': df['eng']  # Ground truth English text
})

# Save to CSV     r"/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_1
results_df.to_csv(r"/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/predictions.csv", index=False)
print("Predictions saved to'/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/predictions.csv'")

Predictions saved to'/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/predictions.csv'


In [19]:
import pandas as pd
import ast
import sacrebleu

# Load the CSV file

df = pd.read_csv(f_test)

# Get the references
references = [example for example in df['eng']]


# Compute BLEU score
bleu = sacrebleu.corpus_bleu(all_predictions,[references])
print(f"BLEU score: {bleu.score}")

ModuleNotFoundError: No module named 'sacrebleu'

In [None]:
import pandas as pd
from rouge_score import rouge_scorer
# Load the CSV file
data = pd.read_csv(r"/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/predictions.csv")
data['actual_text'] = data['actual_text'].fillna('').astype(str)
data['predicted_text'] = data['predicted_text'].fillna('').astype(str)
# Initialize the ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
# Calculate ROUGE scores for each reference-prediction pair
rouge_scores = [scorer.score(ref, pred) for ref, pred in zip(data['actual_text'], data['predicted_text'])]
# Calculate average ROUGE scores
average_rouge1 = sum(score['rouge1'].fmeasure for score in rouge_scores) / len(rouge_scores)
average_rouge2 = sum(score['rouge2'].fmeasure for score in rouge_scores) / len(rouge_scores)
average_rougeL = sum(score['rougeL'].fmeasure for score in rouge_scores) / len(rouge_scores)
# Output the results
average_rouge1, average_rouge2, average_rougeL

In [None]:
import nltk
nltk.download('wordnet')

In [None]:
import pandas as pd
from nltk.translate.meteor_score import meteor_score

# Load the CSV file
file_path = "/scratch/gilbreth/mishr199/Models/T5_Base_Dynamic_3/predictions.csv"  # Replace with your file path
data = pd.read_csv(file_path)

# Check data types and sample data
print(data.dtypes)
print(data[['actual_text', 'predicted_text']].head())

# Ensure both columns are strings before applying .split()
data['actual_text'] = data['actual_text'].astype(str)
data['predicted_text'] = data['predicted_text'].astype(str)

# Calculate METEOR scores for each pair of sentences with tokenized input
data['meteor_score'] = data.apply(lambda row: meteor_score([row['actual_text'].split()], row['predicted_text'].split()), axis=1)

# Calculate the average METEOR score
average_meteor_score = data['meteor_score'].mean()
print("Average METEOR score:", average_meteor_score)

# Test

In [40]:
model.encoder.block[0].layer[1].DenseReluDense.act = getattr(nn, "Tanh")()
#["ReLU", "GELU", "LeakyReLU", "Sigmoid", "Tanh"]

In [41]:
act = model.encoder.block[0].layer[1].DenseReluDense.act 

if type(act) == torch.nn.modules.activation.Tanh:
    print("y")
    


<class 'torch.nn.modules.activation.Tanh'>
y


In [70]:
df

Unnamed: 0,translation
0,"{'en': 'we do what we want', 'ru': 'мы делаем ..."
1,{'en': 'don't you get bored when you're alone'...
2,"{'en': 'tom loves me', 'ru': 'том любит меня'}"
3,"{'en': 'tom is very lazy', 'ru': 'том очень ле..."
4,"{'en': 'this audiobook is worth listening to',..."
...,...
187048,"{'en': 'tom already knows about this', 'ru': '..."
187049,"{'en': 'it was nice to see you again', 'ru': '..."
187050,"{'en': 'can i pick you up', 'ru': 'можно за то..."
187051,"{'en': 'i won the contest', 'ru': 'я выиграл с..."


In [107]:
columns = [
                        'Encoder0_Activation_ReLU', 'Encoder1_Activation_ReLU', 'Encoder2_Activation_ReLU', 'Encoder3_Activation_ReLU', 'Encoder4_Activation_ReLU', 'Encoder5_Activation_ReLU',
                        'Encoder0_Activation_GELU', 'Encoder1_Activation_GELU', 'Encoder2_Activation_GELU', 'Encoder3_Activation_GELU', 'Encoder4_Activation_GELU', 'Encoder5_Activation_GELU',
                        'Encoder0_Activation_LeakyReLU', 'Encoder1_Activation_LeakyReLU', 'Encoder2_Activation_LeakyReLU', 'Encoder3_Activation_LeakyReLU', 'Encoder4_Activation_LeakyReLU', 'Encoder5_Activation_LeakyReLU',
                        'Encoder0_Activation_Sigmoid', 'Encoder1_Activation_Sigmoid', 'Encoder2_Activation_Sigmoid', 'Encoder3_Activation_Sigmoid', 'Encoder4_Activation_Sigmoid', 'Encoder5_Activation_Sigmoid',
                        'Encoder0_Activation_Tanh', 'Encoder1_Activation_Tanh', 'Encoder2_Activation_Tanh', 'Encoder3_Activation_Tanh', 'Encoder4_Activation_Tanh', 'Encoder5_Activation_Tanh',
                        'Decoder0_Activation_ReLU', 'Decoder1_Activation_ReLU', 'Decoder2_Activation_ReLU', 'Decoder3_Activation_ReLU', 'Decoder4_Activation_ReLU', 'Decoder5_Activation_ReLU',
                        'Decoder0_Activation_GELU', 'Decoder1_Activation_GELU', 'Decoder2_Activation_GELU', 'Decoder3_Activation_GELU', 'Decoder4_Activation_GELU', 'Decoder5_Activation_GELU',
                        'Decoder0_Activation_LeakyReLU', 'Decoder1_Activation_LeakyReLU', 'Decoder2_Activation_LeakyReLU', 'Decoder3_Activation_LeakyReLU', 'Decoder4_Activation_LeakyReLU', 'Decoder5_Activation_LeakyReLU',
                        'Decoder0_Activation_Sigmoid', 'Decoder1_Activation_Sigmoid', 'Decoder2_Activation_Sigmoid', 'Decoder3_Activation_Sigmoid', 'Decoder4_Activation_Sigmoid', 'Decoder5_Activation_Sigmoid',
                        'Decoder0_Activation_Tanh', 'Decoder1_Activation_Tanh', 'Decoder2_Activation_Tanh', 'Decoder3_Activation_Tanh', 'Decoder4_Activation_Tanh', 'Decoder5_Activation_Tanh',
                        'Encoder0_Activation_DropOut', 'Encoder1_Activation_DropOut', 'Encoder2_Activation_DropOut', 'Encoder3_Activation_DropOut', 'Encoder4_Activation_DropOut', 'Encoder5_Activation_DropOut',
                        'Decoder0_Activation_DropOut', 'Decoder1_Activation_DropOut', 'Decoder2_Activation_DropOut', 'Decoder3_Activation_DropOut', 'Decoder4_Activation_DropOut', 'Decoder5_Activation_DropOut',
                        'Training Loss', 'Validation Loss']

In [108]:
df2 = pd.DataFrame(columns=columns)
df2['Encoder0_Activation_Tanh'] = [1]

In [110]:
df2['Encoder0_Activation_Tanh']

0    1
Name: Encoder0_Activation_Tanh, dtype: int64

In [14]:
class FeedForwardNN(nn.Module):
    def __init__(self, input_size):
        super(FeedForwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)  # Input to first hidden layer
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)  # First hidden layer to second hidden layer
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 1)  # Output layer

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x
import pickle
with open("model.pkl", "rb") as f:
    loaded_model = pickle.load(f)