In [1]:
def preprocess_dataset(path):
    """ Remove unnecessary characters and encode the sentiment labels.

    The type of preprocessing required changes based on the dataset. For the
    IMDb dataset, the review texts contains HTML break tags (<br/>) leftover
    from the scraping process, and some unnecessary whitespace, which are
    removed. Finally, encode the sentiment labels as 0 for "negative" and 1 for
    "positive". This method assumes the dataset file contains the headers
    "review" and "sentiment".

    Parameters:
        path (str): A path to a dataset file containing the sentiment analysis
            dataset. The structure of the file should be as follows: one column
            called "review" containing the review text, and one column called
            "sentiment" containing the ground truth label. The label options
            should be "negative" and "positive".

    Returns:
        df_dataset (pd.DataFrame): A DataFrame containing the raw data
            loaded from the self.dataset path. In addition to the expected
            "review" and "sentiment" columns, are:

            > review_cleaned - a copy of the "review" column with the HTML
                break tags and unnecessary whitespace removed

            > sentiment_encoded - a copy of the "sentiment" column with the
                "negative" values mapped to 0 and "positive" values mapped
                to 1
    """
    df_dataset = pd.read_csv(path)

    df_dataset['review_cleaned'] = df_dataset['review'].\
        apply(lambda x: x.replace('<br />', ''))

    df_dataset['review_cleaned'] = df_dataset['review_cleaned'].\
        replace('\s+', ' ', regex=True)

    df_dataset['sentiment_encoded'] = df_dataset['sentiment'].\
        apply(lambda x: 0 if x == 'negative' else 1)

    return df_dataset

  replace('\s+', ' ', regex=True)


In [2]:
import numpy as np
import pandas as pd

dataset = preprocess_dataset("C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv")

print(dataset.head(10))  # ดูตัวอย่าง 5 แถวแรก


                                              review sentiment  \
0  One of the other reviewers has mentioned that ...  positive   
1  A wonderful little production. <br /><br />The...  positive   
2  I thought this was a wonderful way to spend ti...  positive   
3  Basically there's a family where a little boy ...  negative   
4  Petter Mattei's "Love in the Time of Money" is...  positive   
5  Probably my all-time favorite movie, a story o...  positive   
6  I sure would like to see a resurrection of a u...  positive   
7  This show was an amazing, fresh & innovative i...  negative   
8  Encouraged by the positive comments about this...  negative   
9  If you like original gut wrenching laughter yo...  positive   

                                      review_cleaned  sentiment_encoded  
0  One of the other reviewers has mentioned that ...                  1  
1  A wonderful little production. The filming tec...                  1  
2  I thought this was a wonderful way to spend ti..

In [None]:
'''
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import TensorDataset, DataLoader
from transformers import (
    BertForSequenceClassification,
    BertTokenizer,
    get_linear_schedule_with_warmup)


class FineTuningPipeline:

    def __init__(
            self,
            dataset,
            tokenizer,
            model,
            optimizer,
            loss_function = nn.CrossEntropyLoss(),
            val_size = 0.1,
            epochs = 4,
            seed = 42):

        self.df_dataset = dataset
        self.tokenizer = tokenizer
        self.model = model
        self.optimizer = optimizer
        self.loss_function = loss_function
        self.val_size = val_size
        self.epochs = epochs
        self.seed = seed

        # Check if GPU is available for faster training time
        if torch.cuda.is_available():
            self.device = torch.device('cuda:0')
        else:
            self.device = torch.device('cpu')

        # Perform fine-tuning
        self.model.to(self.device)
        self.set_seeds()
        self.token_ids, self.attention_masks = self.tokenize_dataset()
        self.train_dataloader, self.val_dataloader = self.create_dataloaders()
        self.scheduler = self.create_scheduler()
        self.fine_tune()

    def tokenize(self, text):
        """ Tokenize input text and return the token IDs and attention mask.

        Tokenize an input string, setting a maximum length of 512 tokens.
        Sequences with more than 512 tokens will be truncated to this limit,
        and sequences with less than 512 tokens will be supplemented with [PAD]
        tokens to bring them up to this limit. The datatype of the returned
        tensors will be the PyTorch tensor format. These return values are
        tensors of size 1 x max_length where max_length is the maximum number
        of tokens per input sequence (512 for BERT).

        Parameters:
            text (str): The text to be tokenized.

        Returns:
            token_ids (torch.Tensor): A tensor of token IDs for each token in
                the input sequence.

            attention_mask (torch.Tensor): A tensor of 1s and 0s where a 1
                indicates a token can be attended to during the attention
                process, and a 0 indicates a token should be ignored. This is
                used to prevent BERT from attending to [PAD] tokens during its
                training/inference.
        """
        batch_encoder = self.tokenizer.encode_plus(
            text,
            max_length = 128,
            #max_length = 512,
            padding = 'max_length',
            truncation = True,
            return_tensors = 'pt')

        token_ids = batch_encoder['input_ids']
        attention_mask = batch_encoder['attention_mask']

        return token_ids, attention_mask

    def tokenize_dataset(self):
        """ Apply the self.tokenize method to the fine-tuning dataset.

        Tokenize and return the input sequence for each row in the fine-tuning
        dataset given by self.dataset. The return values are tensors of size
        len_dataset x max_length where len_dataset is the number of rows in the
        fine-tuning dataset and max_length is the maximum number of tokens per
        input sequence (512 for BERT).

        Parameters:
            None.

        Returns:
            token_ids (torch.Tensor): A tensor of tensors containing token IDs
            for each token in the input sequence.

            attention_masks (torch.Tensor): A tensor of tensors containing the
                attention masks for each sequence in the fine-tuning dataset.
        """
        token_ids = []
        attention_masks = []

        for review in self.df_dataset['review_cleaned']:
            tokens, masks = self.tokenize(review)
            token_ids.append(tokens)
            attention_masks.append(masks)

        token_ids = torch.cat(token_ids, dim=0)
        attention_masks = torch.cat(attention_masks, dim=0)

        return token_ids, attention_masks

    def create_dataloaders(self):
        """ Create dataloaders for the train and validation set.

        Split the tokenized dataset into train and validation sets according to
        the self.val_size value. For example, if self.val_size is set to 0.1,
        90% of the data will be used to form the train set, and 10% for the
        validation set. Convert the "sentiment_encoded" column (labels for each
        row) to PyTorch tensors to be used in the dataloaders.

        Parameters:
            None.

        Returns:
            train_dataloader (torch.utils.data.dataloader.DataLoader): A
                dataloader of the train data, including the token IDs,
                attention masks, and sentiment labels.

            val_dataloader (torch.utils.data.dataloader.DataLoader): A
                dataloader of the validation data, including the token IDs,
                attention masks, and sentiment labels.

        """
        train_ids, val_ids = train_test_split(
                        self.token_ids,
                        test_size=self.val_size,
                        shuffle=False)

        train_masks, val_masks = train_test_split(
                                    self.attention_masks,
                                    test_size=self.val_size,
                                    shuffle=False)

        labels = torch.tensor(self.df_dataset['sentiment_encoded'].values)
        train_labels, val_labels = train_test_split(
                                        labels,
                                        test_size=self.val_size,
                                        shuffle=False)

        train_data = TensorDataset(train_ids, train_masks, train_labels)
        train_dataloader = DataLoader(train_data, shuffle=True, batch_size=32) # batch_size = 16 before
        val_data = TensorDataset(val_ids, val_masks, val_labels)
        val_dataloader = DataLoader(val_data, batch_size=32) # batch_size = 16 before

        return train_dataloader, val_dataloader

    def create_scheduler(self):
        """ Create a linear scheduler for the learning rate.

        Create a scheduler with a learning rate that increases linearly from 0
        to a maximum value (called the warmup period), then decreases linearly
        to 0 again. num_warmup_steps is set to 0 here based on an example from
        Hugging Face:

        https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2
        d008813037968a9e58/examples/run_glue.py#L308

        Read more about schedulers here:

        https://huggingface.co/docs/transformers/main_classes/optimizer_
        schedules#transformers.get_linear_schedule_with_warmup
        """
        num_training_steps = self.epochs * len(self.train_dataloader)
        scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=0,
            num_training_steps=num_training_steps)

        return scheduler

    def set_seeds(self):
        """ Set the random seeds so that results are reproduceable.

        Parameters:
            None.

        Returns:
            None.
        """
        np.random.seed(self.seed)
        torch.manual_seed(self.seed)
        torch.cuda.manual_seed_all(self.seed)

    def fine_tune(self):
        """Train the classification head on the BERT model.

        Fine-tune the model by training the classification head (linear layer)
        sitting on top of the BERT model. The model trained on the data in the
        self.train_dataloader, and validated at the end of each epoch on the
        data in the self.val_dataloader. The series of steps are described
        below:

        Training:

        > Create a dictionary to store the average training loss and average
          validation loss for each epoch.
        > Store the time at the start of training, this is used to calculate
          the time taken for the entire training process.
        > Begin a loop to train the model for each epoch in self.epochs.

        For each epoch:

        > Switch the model to train mode. This will cause the model to behave
          differently than when in evaluation mode (e.g. the batchnorm and
          dropout layers are activated in train mode, but disabled in
          evaluation mode).
        > Set the training loss to 0 for the start of the epoch. This is used
          to track the loss of the model on the training data over subsequent
          epochs. The loss should decrease with each epoch if training is
          successful.
        > Store the time at the start of the epoch, this is used to calculate
          the time taken for the epoch to be completed.
        > As per the BERT authors' recommendations, the training data for each
          epoch is split into batches. Loop through the training process for
          each batch.

        For each batch:

        > Move the token IDs, attention masks, and labels to the GPU if
          available for faster processing, otherwise these will be kept on the
          CPU.
        > Invoke the zero_grad method to reset the calculated gradients from
          the previous iteration of this loop.
        > Pass the batch to the model to calculate the logits (predictions
          based on the current classifier weights and biases) as well as the
          loss.
        > Increment the total loss for the epoch. The loss is returned from the
          model as a PyTorch tensor so extract the float value using the item
          method.
        > Perform a backward pass of the model and propagate the loss through
          the classifier head. This will allow the model to determine what
          adjustments to make to the weights and biases to improve its
          performance on the batch.
        > Clip the gradients to be no larger than 1.0 so the model does not
          suffer from the exploding gradients problem.
        > Call the optimizer to take a step in the direction of the error
          surface as determined by the backward pass.

        After training on each batch:

        > Calculate the average loss and time taken for training on the epoch.

        Validation step for the epoch:

        > Switch the model to evaluation mode.
        > Set the validation loss to 0. This is used to track the loss of the
          model on the validation data over subsequent epochs. The loss should
          decrease with each epoch if training was successful.
        > Store the time at the start of the validation, this is used to
          calculate the time taken for the validation for this epoch to be
          completed.
        > Split the validation data into batches.

        For each batch:

        > Move the token IDs, attention masks, and labels to the GPU if
          available for faster processing, otherwise these will be kept on the
          CPU.
        > Invoke the no_grad method to instruct the model not to calculate the
          gradients since we wil not be performing any optimization steps here,
          only inference.
        > Pass the batch to the model to calculate the logits (predictions
          based on the current classifier weights and biases) as well as the
          loss.
        > Extract the logits and labels from the model and move them to the CPU
          (if they are not already there).
        > Increment the loss and calculate the accuracy based on the true
          labels in the validation dataloader.
        > Calculate the average loss and accuracy, and add these to the loss
          dictionary.
        """

        loss_dict = {
            'epoch': [i+1 for i in range(self.epochs)],
            'average training loss': [],
            'average validation loss': []
        }

        t0_train = datetime.now()

        for epoch in range(0, self.epochs):

            # Train step
            self.model.train()
            training_loss = 0
            t0_epoch = datetime.now()

            print(f'{"-"*20} Epoch {epoch+1} {"-"*20}')
            print('\nTraining:\n---------')
            print(f'Start Time:       {t0_epoch}')

            for batch in self.train_dataloader:

                batch_token_ids = batch[0].to(self.device)
                batch_attention_mask = batch[1].to(self.device)
                batch_labels = batch[2].to(self.device)

                self.model.zero_grad()

                loss, logits = self.model(
                    batch_token_ids,
                    token_type_ids = None,
                    attention_mask=batch_attention_mask,
                    labels=batch_labels,
                    return_dict=False)

                training_loss += loss.item()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                self.optimizer.step()
                self.scheduler.step()

            average_train_loss = training_loss / len(self.train_dataloader)
            time_epoch = datetime.now() - t0_epoch

            print(f'Average Loss:     {average_train_loss}')
            print(f'Time Taken:       {time_epoch}')

            # Validation step
            self.model.eval()
            val_loss = 0
            val_accuracy = 0
            t0_val = datetime.now()

            print('\nValidation:\n---------')
            print(f'Start Time:       {t0_val}')

            for batch in self.val_dataloader:

                batch_token_ids = batch[0].to(self.device)
                batch_attention_mask = batch[1].to(self.device)
                batch_labels = batch[2].to(self.device)

                with torch.no_grad():
                    (loss, logits) = self.model(
                        batch_token_ids,
                        attention_mask = batch_attention_mask,
                        labels = batch_labels,
                        token_type_ids = None,
                        return_dict=False)

                logits = logits.detach().cpu().numpy()
                label_ids = batch_labels.to('cpu').numpy()
                val_loss += loss.item()
                val_accuracy += self.calculate_accuracy(logits, label_ids)


            average_val_accuracy = val_accuracy / len(self.val_dataloader)
            average_val_loss = val_loss / len(self.val_dataloader)
            time_val = datetime.now() - t0_val

            print(f'Average Loss:     {average_val_loss}')
            print(f'Average Accuracy: {average_val_accuracy}')
            print(f'Time Taken:       {time_val}\n')

            loss_dict['average training loss'].append(average_train_loss)
            loss_dict['average validation loss'].append(average_val_loss)

        print(f'Total training time: {datetime.now()-t0_train}')

    def calculate_accuracy(self, preds, labels):
        """ Calculate the accuracy of model predictions against true labels.

        Parameters:
            preds (np.array): The predicted label from the model
            labels (np.array): The true label

        Returns:
            accuracy (float): The accuracy as a percentage of the correct
                predictions.
        """
        pred_flat = np.argmax(preds, axis=1).flatten()
        labels_flat = labels.flatten()
        accuracy = np.sum(pred_flat == labels_flat) / len(labels_flat)

        return accuracy

    def predict(self, dataloader):
        """Return the predicted probabilities of each class for input text.
        
        Parameters:
            dataloader (torch.utils.data.DataLoader): A DataLoader containing
                the token IDs and attention masks for the text to perform
                inference on.
        
        Returns:
            probs (PyTorch.Tensor): A tensor containing the probability values
                for each class as predicted by the model.

        """

        self.model.eval()
        all_logits = []

        for batch in dataloader:

            batch_token_ids, batch_attention_mask = tuple(t.to(self.device) \
                for t in batch)[:2]

            with torch.no_grad():
                outputs = self.model(batch_token_ids, attention_mask=batch_attention_mask)
                logits = outputs.logits

                #logits = self.model(batch_token_ids, batch_attention_mask)

            all_logits.append(logits)

        all_logits = torch.cat(all_logits, dim=0)

        probs = F.softmax(all_logits, dim=1).cpu().numpy()
        return probs


'''        


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import transformers
print(transformers.__version__)


4.51.3


In [8]:
#pip install sentencepiece

# Fine tune + ElaLora model

In [None]:
#pip install peft

Collecting peftNote: you may need to restart the kernel to use updated packages.

  Downloading peft-0.16.0-py3-none-any.whl.metadata (14 kB)
Downloading peft-0.16.0-py3-none-any.whl (472 kB)
Installing collected packages: peft
Successfully installed peft-0.16.0


In [2]:
# 🚀 FineTuningPipeline (Modify ElaLoRA)

from loralib.elalora import SVDLinear
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import TensorDataset, DataLoader
from transformers import (
    BertForSequenceClassification,
    BertTokenizer,
    get_linear_schedule_with_warmup)


class FineTuningPipeline:
    def __init__(self, dataset, tokenizer, model, optimizer,
                 loss_function=nn.CrossEntropyLoss(), val_size=0.1,
                 epochs=4, seed=42 , allocator=None):
        
        self.allocator = allocator
        self.df_dataset = dataset
        self.tokenizer = tokenizer
        self.model = model
        self.optimizer = optimizer
        self.loss_function = loss_function
        self.val_size = val_size
        self.epochs = epochs
        self.seed = seed

        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        self.set_seeds()
        self.freeze_base_weights()  # add freeze gradient
        
        # tokenization + dataloaders
        self.token_ids, self.attention_masks = self.tokenize_dataset()
        self.train_dataloader, self.val_dataloader = self.create_dataloaders()

        # 🔴 เพิ่มตรงนี้
        self.configure_allocator()

        self.scheduler = self.create_scheduler()
        self.fine_tune()

    # freeze fradient update weight
    def freeze_base_weights(self):
        print("🔒 Freezing base model weights (non-SVDLinear layers)...")

        from loralib.elalora import SVDLinear
        for name, module in self.model.named_modules():
            if isinstance(module, SVDLinear):
                for param_name, param in module.named_parameters():
                    param.requires_grad = True
            else:
                for param in module.parameters(recurse=False):
                    param.requires_grad = False

        print("🔎 Checking which parameters are trainable...")
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                print(f"✅ TRAINING: {name}")
            else:
                print(f"❌ FROZEN:   {name}")

    '''
    def tokenize(self, text):
        encoded = self.tokenizer.encode_plus(
            text,
            max_length=256, # before 128 , 512 , 256
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        return encoded['input_ids'], encoded['attention_mask']
    '''

    def tokenize(self, text, max_len=256, ratio=0.72):
        budget = max_len - 2
        head = int(budget * ratio)
        tail = budget - head

        toks = self.tokenizer(text, add_special_tokens=False).input_ids
        if len(toks) > budget:
            toks = toks[:head] + toks[-tail:]

        # ใช้ return_tensors=None แล้วแปลงเอง เพื่อคุม shape ชัวร์
        enc = self.tokenizer.prepare_for_model(
            toks, max_length=max_len, truncation=True, padding='max_length', return_tensors=None
        )
        ids  = torch.tensor(enc["input_ids"], dtype=torch.long).unsqueeze(0)      # (1, L)
        mask = torch.tensor(enc["attention_mask"], dtype=torch.long).unsqueeze(0) # (1, L)
        return ids, mask

    '''
    def tokenize_dataset(self):
        token_ids, attention_masks = [], []
        for review in self.df_dataset['review_cleaned']:
            ids, mask = self.tokenize(review)
            token_ids.append(ids)
            attention_masks.append(mask)
        return torch.cat(token_ids, dim=0), torch.cat(attention_masks, dim=0)
    '''

    def tokenize_dataset(self):
        token_ids, attention_masks = [], []
        for review in self.df_dataset['review_cleaned']:
            ids, mask = self.tokenize(review)        # ids, mask shape = (1, L)
            token_ids.append(ids)
            attention_masks.append(mask)
        token_ids = torch.cat(token_ids, dim=0)          # (N, L)
        attention_masks = torch.cat(attention_masks, 0)  # (N, L)

        # debug safety
        print("shapes:", token_ids.shape, attention_masks.shape)
        return token_ids, attention_masks



    def create_dataloaders(self):
        from sklearn.model_selection import train_test_split
        labels = torch.tensor(self.df_dataset['sentiment_encoded'].values)
        train_ids, val_ids, train_masks, val_masks, train_labels, val_labels = train_test_split(
            self.token_ids, 
            self.attention_masks, 
            labels, 
            test_size=self.val_size, 
            shuffle=True,
            stratify=labels,
            random_state=self.seed
            )

        train_data = TensorDataset(train_ids, train_masks, train_labels)
        val_data = TensorDataset(val_ids, val_masks, val_labels)

        #return DataLoader(train_data, shuffle=True, batch_size=16), DataLoader(val_data, batch_size=16)  # before bacth_size = 32

        train_loader = DataLoader(train_data, shuffle=True,  batch_size=16, num_workers=2, pin_memory=True, drop_last=False)
        val_loader   = DataLoader(val_data,   shuffle=False, batch_size=16, num_workers=2, pin_memory=True)
        return train_loader, val_loader
    
    def configure_allocator(self):
        if self.allocator is None:
            return
        steps_per_epoch = len(self.train_dataloader)
        total_steps = steps_per_epoch * self.epochs
        # 10% / 60% / 10%
        self.allocator.total_step    = total_steps
        self.allocator.init_warmup   = int(0.10 * total_steps)
        self.allocator.final_warmup  = int(0.10 * total_steps)
        self.allocator.mask_interval = max(50, int(0.10 * total_steps))
        print("Allocator:", self.allocator.init_warmup, self.allocator.final_warmup,
          self.allocator.mask_interval, self.allocator.total_step)

    #def create_scheduler(self):
    #    total_steps = self.epochs * len(self.train_dataloader)
    #    return get_linear_schedule_with_warmup(self.optimizer, 0, total_steps)
    
    def create_scheduler(self):
        total_steps = self.epochs * len(self.train_dataloader)
        warmup_steps = int(0.10 * total_steps)  # 6% warmup
        return get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=warmup_steps,
            num_training_steps=total_steps
        )

    def set_seeds(self):
        np.random.seed(self.seed)
        torch.manual_seed(self.seed)
        torch.cuda.manual_seed_all(self.seed)

    def fine_tune(self):
        from datetime import datetime
        print(f"🔍 Model type: {type(self.model)}")
        t0_train = datetime.now()
        global_step = 0  # 🔁 Step counter for ElaLoRA
        
        for epoch in range(self.epochs):
            print(f"\n===== Epoch {epoch+1}/{self.epochs} =====")

            # Training
            self.model.train()
            train_loss = 0
            for batch in self.train_dataloader:
                ids, mask, labels = [x.to(self.device) for x in batch]
                self.model.zero_grad()
                outputs = self.model(input_ids=ids, attention_mask=mask, labels=labels)
                loss = outputs.loss
                logits = outputs.logits
                loss.backward()
                train_loss += loss.item()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                self.optimizer.step()
                self.scheduler.step()



                global_step += 1

                # ✅ NEW: Adapt rank if model supports it
                if hasattr(self.model, "maybe_adapt_rank"):
                    self.model.maybe_adapt_rank(global_step=global_step)



            print(f"✅ Avg Train Loss: {train_loss / len(self.train_dataloader):.4f}")

            # Validation
            self.model.eval()
            val_loss, val_accuracy = 0, 0
            t0_val = datetime.now()
            for batch in self.val_dataloader:
                ids, mask, labels = [x.to(self.device) for x in batch]
                with torch.no_grad():
                    outputs = self.model(input_ids=ids, attention_mask=mask, labels=labels)
                loss = outputs.loss
                logits = outputs.logits
                val_loss += loss.item()
                val_accuracy += self.calculate_accuracy(logits.cpu().numpy(), labels.cpu().numpy())

            val_time = datetime.now() - t0_val
            print(f"🧪 Avg Val Loss:  {val_loss / len(self.val_dataloader):.4f}")
            print(f"🎯 Val Accuracy: {val_accuracy / len(self.val_dataloader):.4f}")
            print(f"🕒 Val Time:      {val_time}")

        print(f"\n✅ Total training time: {datetime.now() - t0_train}")


    def calculate_accuracy(self, preds, labels):
        preds_flat = np.argmax(preds, axis=1).flatten()
        return np.sum(preds_flat == labels.flatten()) / len(labels)
    
    def predict(self, dataloader):
        """Return the predicted probabilities of each class for input text.
        
        Parameters:
            dataloader (torch.utils.data.DataLoader): A DataLoader containing
                the token IDs and attention masks for the text to perform
                inference on.
        
        Returns:
            probs (PyTorch.Tensor): A tensor containing the probability values
                for each class as predicted by the model.

        """

        self.model.eval()
        all_logits = []

        for batch in dataloader:

            batch_token_ids, batch_attention_mask = tuple(t.to(self.device) \
                for t in batch)[:2]

            with torch.no_grad():
                outputs = self.model(batch_token_ids, attention_mask=batch_attention_mask)
                logits = outputs.logits

                #logits = self.model(batch_token_ids, batch_attention_mask)

            all_logits.append(logits)

        all_logits = torch.cat(all_logits, dim=0)

        probs = F.softmax(all_logits, dim=1).cpu().numpy()
        return probs
    

In [None]:
#pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-win_amd64.whl.metadata (10 kB)
Downloading bitsandbytes-0.46.1-py3-none-win_amd64.whl (72.2 MB)
   ---------------------------------------- 0.0/72.2 MB ? eta -:--:--
    --------------------------------------- 1.3/72.2 MB 9.5 MB/s eta 0:00:08
   - -------------------------------------- 3.4/72.2 MB 9.6 MB/s eta 0:00:08
   -- ------------------------------------- 5.0/72.2 MB 8.6 MB/s eta 0:00:08
   --- ------------------------------------ 6.8/72.2 MB 8.9 MB/s eta 0:00:08
   ---- ----------------------------------- 8.9/72.2 MB 9.1 MB/s eta 0:00:07
   ------ --------------------------------- 11.0/72.2 MB 9.2 MB/s eta 0:00:07
   ------ --------------------------------- 12.3/72.2 MB 8.9 MB/s eta 0:00:07
   ------- -------------------------------- 14.4/72.2 MB 9.1 MB/s eta 0:00:07
   --------- ------------------------------ 16.8/72.2 MB 9.2 MB/s eta 0:00:07
   ---------- ----------------------------- 18.4/72.2 MB 9.0 MB/s eta 

# ElaLoRA

In [None]:
'''
! git clone https://github.com/HuandongChang/ElaLoRA.git
! pip install -e ./ElaLoRA/loralib

'''

fatal: destination path 'ElaLoRA' already exists and is not an empty directory.


Obtaining file:///C:/Users/Lenovo/Desktop/NLP/Final_project/test_run_model/ElaLoRA/loralib
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Installing collected packages: loralib
  Running setup.py develop for loralib
Successfully installed loralib-0.1.0


  DEPRECATION: Legacy editable install of loralib==0.1.0 from file:///C:/Users/Lenovo/Desktop/NLP/Final_project/test_run_model/ElaLoRA/loralib (setup.py develop) is deprecated. pip 25.0 will enforce this behaviour change. A possible replacement is to add a pyproject.toml or enable --use-pep517, and use setuptools >= 64. If the resulting installation is not behaving as expected, try using --config-settings editable_mode=compat. Please consult the setuptools documentation for more information. Discussion can be found at https://github.com/pypa/pip/issues/11457


In [2]:
import loralib.elalora
print(dir(loralib.elalora))

['F', 'List', 'LoRALayer', 'Optional', 'RankAllocator', 'SVDLinear', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'compute_orth_regu', 'json', 'math', 'nn', 'np', 'os', 'plot_ipt_graph', 'plot_rank', 'torch']


In [1]:
from loralib.elalora import RankAllocator


In [1]:
from loralib.elalora import SVDLinear, RankAllocator


# ElaLoRA

In [5]:
for name, param in base_model.named_parameters():
    print(name)

model.embeddings.tok_embeddings.weight
model.embeddings.norm.weight
model.layers.0.attn.Wqkv.weight
model.layers.0.attn.Wo.weight
model.layers.0.mlp_norm.weight
model.layers.0.mlp.Wi.weight
model.layers.0.mlp.Wo.weight
model.layers.1.attn_norm.weight
model.layers.1.attn.Wqkv.weight
model.layers.1.attn.Wo.weight
model.layers.1.mlp_norm.weight
model.layers.1.mlp.Wi.weight
model.layers.1.mlp.Wo.weight
model.layers.2.attn_norm.weight
model.layers.2.attn.Wqkv.weight
model.layers.2.attn.Wo.weight
model.layers.2.mlp_norm.weight
model.layers.2.mlp.Wi.weight
model.layers.2.mlp.Wo.weight
model.layers.3.attn_norm.weight
model.layers.3.attn.Wqkv.weight
model.layers.3.attn.Wo.weight
model.layers.3.mlp_norm.weight
model.layers.3.mlp.Wi.weight
model.layers.3.mlp.Wo.weight
model.layers.4.attn_norm.weight
model.layers.4.attn.Wqkv.weight
model.layers.4.attn.Wo.weight
model.layers.4.mlp_norm.weight
model.layers.4.mlp.Wi.weight
model.layers.4.mlp.Wo.weight
model.layers.5.attn_norm.weight
model.layers.5.at

In [None]:
from loralib.elalora import RankAllocator
help(RankAllocator)

Help on class RankAllocator in module loralib.elalora:

class RankAllocator(builtins.object)
 |  RankAllocator(model, lora_r: int, target_rank: int, init_warmup: int, final_warmup: int, mask_interval: int, beta1: float, beta2: float, total_step: Optional[int] = None, target_total_rank: Optional[int] = None, tb_writter=None, tb_writter_loginterval: int = 500, k: int = 2, b: int = 4, output_dir: str = None, enable_scheduler: bool = False)
 |
 |  The RankAllocator for AdaLoRA Model that will be called every training step.
 |  Paper: https://openreview.net/pdf?id=lq62uWRJjiY
 |
 |  Args:
 |      model: the model that we apply AdaLoRA to.
 |      lora_r (`int`): The initial rank for each incremental matrix.
 |      target_rank (`int`): The target average rank of incremental matrix.
 |      init_warmup (`int`): The steps of initial fine-tuning warmup.
 |      final_warmup (`int`): The step of final fine-tuning.
 |      mask_interval (`int`): The time internval between two budget allocations.

In [None]:
'''
from loralib.elalora import SVDLinear, RankAllocator
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import torch.nn as nn
import math

dataset = preprocess_dataset('C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv')

model_name = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# ✅ แปะ SVDLinear เฉพาะเลเยอร์สำคัญ (ชื่อยอดฮิตในสาย BERT/ModernBERT)
#   - ถ้า print(base_model) แล้วพบชื่อ q_proj/k_proj/... ให้เปลี่ยนเซ็ตนี้เป็น {"q_proj","k_proj","v_proj","o_proj","dense","classifier"}
TARGET_LINEAR_TOKENS = {"query","key","value","dense","intermediate","output","classifier"}

def replace_linear_with_svdlinear(module, prefix=""):
    for name, child in module.named_children():
        full = f"{prefix}.{name}" if prefix else name
        if isinstance(child, nn.Linear) and any(tok in full for tok in TARGET_LINEAR_TOKENS):
            in_f, out_f, has_bias = child.in_features, child.out_features, child.bias is not None
            setattr(module, name, SVDLinear(in_f, out_f, bias=has_bias))
        else:
            replace_linear_with_svdlinear(child, full)

replace_linear_with_svdlinear(base_model)

# ✅ Freeze base / train เฉพาะ SVDLinear (+ head ถ้ามี)
for p in base_model.parameters():
    p.requires_grad = False

for m in base_model.modules():
    if isinstance(m, SVDLinear):
        for p in m.parameters():
            p.requires_grad = True

if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        p.requires_grad = True

# ✅ Optimizer: กันพารามิเตอร์ซ้ำระหว่าง adapters กับ head
seen = set()

adap_params = []
for m in base_model.modules():
    if isinstance(m, SVDLinear):
        for p in m.parameters():
            if p.requires_grad and id(p) not in seen:
                adap_params.append(p)
                seen.add(id(p))

head_params = []
if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        if p.requires_grad and id(p) not in seen:
            head_params.append(p)
            seen.add(id(p))

param_groups = []
if adap_params:
    param_groups.append({"params": adap_params, "lr": 1.5e-3, "weight_decay": 0.01})
if head_params:
    param_groups.append({"params": head_params, "lr": 2e-3, "weight_decay": 0.0})

optimizer = AdamW(param_groups)

# ✅ Steps (ตั้งค่าเบื้องต้นได้ ถ้า Pipeline ภายในสร้าง dataloader เอง)
epochs = 3
estimated_total_steps = 2000  # ปรับเป็น len(train_loader)*epochs ถ้ามี

allocator = RankAllocator(
    model=base_model,
    lora_r=16,
    target_rank=24,
    init_warmup=int(0.10 * estimated_total_steps),
    final_warmup=int(0.60 * estimated_total_steps),
    mask_interval=max(50, int(0.10 * estimated_total_steps)),
    total_step=estimated_total_steps,
    beta1=0.85,
    beta2=0.85
)

# ✅ Fine-tuning pipeline (แนะนำใน preprocess: max_length≈320) use 256
fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=base_model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=epochs,
    seed=42,
    allocator=allocator
)
'''

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🔒 Freezing base model weights (non-SVDLinear layers)...
🔎 Checking which parameters are trainable...
❌ FROZEN:   model.embeddings.tok_embeddings.weight
❌ FROZEN:   model.embeddings.norm.weight
❌ FROZEN:   model.layers.0.attn.Wqkv.weight
❌ FROZEN:   model.layers.0.attn.Wo.weight
❌ FROZEN:   model.layers.0.mlp_norm.weight
❌ FROZEN:   model.layers.0.mlp.Wi.weight
❌ FROZEN:   model.layers.0.mlp.Wo.weight
❌ FROZEN:   model.layers.1.attn_norm.weight
❌ FROZEN:   model.layers.1.attn.Wqkv.weight
❌ FROZEN:   model.layers.1.attn.Wo.weight
❌ FROZEN:   model.layers.1.mlp_norm.weight
❌ FROZEN:   model.layers.1.mlp.Wi.weight
❌ FROZEN:   model.layers.1.mlp.Wo.weight
❌ FROZEN:   model.layers.2.attn_norm.weight
❌ FROZEN:   model.layers.2.attn.Wqkv.weight
❌ FROZEN:   model.layers.2.attn.Wo.weight
❌ FROZEN:   model.layers.2.mlp_norm.weight
❌ FROZEN:   model.layers.2.mlp.Wi.weight
❌ FROZEN:   model.layers.2.mlp.Wo.weight
❌ FROZEN:   model.layers.3.attn_norm.weight
❌ FROZEN:   model.layers.3.attn.Wqkv.weigh

In [None]:
from loralib.elalora import SVDLinear, RankAllocator
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import torch.nn as nn
import math


dataset = preprocess_dataset('C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv')

model_name = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# SVDLinear  BERT/ModernBERT
#   - ถ้า print(base_model) แล้วพบชื่อ q_proj/k_proj/... ให้เปลี่ยนเซ็ตนี้เป็น {"q_proj","k_proj","v_proj","o_proj","dense","classifier"}
#TARGET_LINEAR_TOKENS = {"q_proj","k_proj","v_proj","o_proj","dense","classifier"}
#TARGET_LINEAR_TOKENS = {"query","key","value","dense","intermediate","output","classifier"}
TARGET_LINEAR_TOKENS = {"Wqkv", "Wo", "Wi","dense","classifier"}

def replace_linear_with_svdlinear(module, prefix=""):
    for name, child in module.named_children():
        full = f"{prefix}.{name}" if prefix else name
        if isinstance(child, nn.Linear) and any(tok in full for tok in TARGET_LINEAR_TOKENS):
            in_f, out_f, has_bias = child.in_features, child.out_features, child.bias is not None
            setattr(module, name, SVDLinear(in_f, out_f, bias=has_bias))
        else:
            replace_linear_with_svdlinear(child, full)

replace_linear_with_svdlinear(base_model)

from loralib.elalora import SVDLinear
svd_names = [n for n, m in base_model.named_modules() if isinstance(m, SVDLinear)]
print("SVDLinear count =", len(svd_names))
print(svd_names[:20])

#  Freeze base / train เฉพาะ SVDLinear 
for p in base_model.parameters():
    p.requires_grad = False

for m in base_model.modules():
    if isinstance(m, SVDLinear):
        for p in m.parameters():
            p.requires_grad = True

if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        p.requires_grad = True

# เปิด classifier เ
if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        p.requires_grad = True

# ✅ เพิ่ม: เปิดทุกชั้นที่ลงท้ายด้วย "norm"
for name, module in base_model.named_modules():
    if name.endswith("norm"):
        for p in module.parameters():
            p.requires_grad = True

# ✅ Optimizer: adapters กับ head
seen = set()

adap_params = []
for m in base_model.modules():
    if isinstance(m, SVDLinear):
        for p in m.parameters():
            if p.requires_grad and id(p) not in seen:
                adap_params.append(p)
                seen.add(id(p))

head_params = []
if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        if p.requires_grad and id(p) not in seen:
            head_params.append(p)
            seen.add(id(p))

param_groups = []
if adap_params:
    param_groups.append({"params": adap_params, "lr": 1.5e-3, "weight_decay": 0.01})
if head_params:
    param_groups.append({"params": head_params, "lr": 2.0e-3, "weight_decay": 0.0})

optimizer = AdamW(param_groups)

# ✅ Steps 
epochs = 5
'''
tmp_pipeline = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=base_model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=epochs,
    seed=42,
    allocator=None  # ยังไม่ส่ง allocator เข้าไป
)
'''

#steps_per_epoch = len(tmp_pipeline.train_dataloader)
#estimated_total_steps = steps_per_epoch * epochs

allocator = RankAllocator(
    model=base_model,
    lora_r=24,
    target_rank=24,
    init_warmup=1,         # dummy, จะถูก override ภายใน pipeline
    final_warmup=1,        # dummy
    mask_interval=1,       # dummy
    total_step=1,
    #init_warmup=int(0.10 * estimated_total_steps),
    #final_warmup=int(0.60 * estimated_total_steps),
    #mask_interval=max(50, int(0.10 * estimated_total_steps)),
    #total_step=estimated_total_steps,
    beta1=0.85,
    beta2=0.85
)


loss_fn = nn.CrossEntropyLoss(label_smoothing=0.05)
# ✅ Fine-tuning pipeline  use 320
fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=base_model,
    optimizer=optimizer,
    loss_function=loss_fn,   
    val_size=0.1,
    epochs=epochs,
    seed=42,
    allocator=allocator
)

### adjust code ver2

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SVDLinear count = 90
['model.layers.0.attn.Wqkv', 'model.layers.0.attn.Wo', 'model.layers.0.mlp.Wi', 'model.layers.0.mlp.Wo', 'model.layers.1.attn.Wqkv', 'model.layers.1.attn.Wo', 'model.layers.1.mlp.Wi', 'model.layers.1.mlp.Wo', 'model.layers.2.attn.Wqkv', 'model.layers.2.attn.Wo', 'model.layers.2.mlp.Wi', 'model.layers.2.mlp.Wo', 'model.layers.3.attn.Wqkv', 'model.layers.3.attn.Wo', 'model.layers.3.mlp.Wi', 'model.layers.3.mlp.Wo', 'model.layers.4.attn.Wqkv', 'model.layers.4.attn.Wo', 'model.layers.4.mlp.Wi', 'model.layers.4.mlp.Wo']


You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


🔒 Freezing base model weights (non-SVDLinear layers)...
🔎 Checking which parameters are trainable...
❌ FROZEN:   model.embeddings.tok_embeddings.weight
❌ FROZEN:   model.embeddings.norm.weight
✅ TRAINING: model.layers.0.attn.Wqkv.weight
✅ TRAINING: model.layers.0.attn.Wo.weight
❌ FROZEN:   model.layers.0.mlp_norm.weight
✅ TRAINING: model.layers.0.mlp.Wi.weight
✅ TRAINING: model.layers.0.mlp.Wo.weight
❌ FROZEN:   model.layers.1.attn_norm.weight
✅ TRAINING: model.layers.1.attn.Wqkv.weight
✅ TRAINING: model.layers.1.attn.Wo.weight
❌ FROZEN:   model.layers.1.mlp_norm.weight
✅ TRAINING: model.layers.1.mlp.Wi.weight
✅ TRAINING: model.layers.1.mlp.Wo.weight
❌ FROZEN:   model.layers.2.attn_norm.weight
✅ TRAINING: model.layers.2.attn.Wqkv.weight
✅ TRAINING: model.layers.2.attn.Wo.weight
❌ FROZEN:   model.layers.2.mlp_norm.weight
✅ TRAINING: model.layers.2.mlp.Wi.weight
✅ TRAINING: model.layers.2.mlp.Wo.weight
❌ FROZEN:   model.layers.3.attn_norm.weight
✅ TRAINING: model.layers.3.attn.Wqkv.weigh

In [None]:


from loralib.elalora import SVDLinear, RankAllocator
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import torch.nn as nn
import math



dataset = preprocess_dataset('C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv')

model_name = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# ✅ แปะ SVDLinear เฉพาะเลเยอร์สำคัญ (ชื่อยอดฮิตในสาย BERT/ModernBERT)
#   - ถ้า print(base_model) แล้วพบชื่อ q_proj/k_proj/... ให้เปลี่ยนเซ็ตนี้เป็น {"q_proj","k_proj","v_proj","o_proj","dense","classifier"}
#TARGET_LINEAR_TOKENS = {"q_proj","k_proj","v_proj","o_proj","dense","classifier"}
#TARGET_LINEAR_TOKENS = {"query","key","value","dense","intermediate","output","classifier"}
TARGET_LINEAR_TOKENS = {"Wqkv", "Wo", "Wi", "dense", "classifier"}

def replace_linear_with_svdlinear(module, prefix=""):
    for name, child in module.named_children():
        full = f"{prefix}.{name}" if prefix else name
        if isinstance(child, nn.Linear) and any(tok in full for tok in TARGET_LINEAR_TOKENS):
            in_f, out_f, has_bias = child.in_features, child.out_features, child.bias is not None
            setattr(module, name, SVDLinear(in_f, out_f, bias=has_bias))
        else:
            replace_linear_with_svdlinear(child, full)

replace_linear_with_svdlinear(base_model)

from loralib.elalora import SVDLinear
svd_names = [n for n, m in base_model.named_modules() if isinstance(m, SVDLinear)]
print("SVDLinear count =", len(svd_names))
print(svd_names[:20])

# ✅ Freeze base / train เฉพาะ SVDLinear (+ head ถ้ามี)
for p in base_model.parameters():
    p.requires_grad = False

for m in base_model.modules():
    if isinstance(m, SVDLinear):
        for p in m.parameters():
            p.requires_grad = True

if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        p.requires_grad = True

# ✅ Optimizer: กันพารามิเตอร์ซ้ำระหว่าง adapters กับ head
seen = set()

adap_params = []
for m in base_model.modules():
    if isinstance(m, SVDLinear):
        for p in m.parameters():
            if p.requires_grad and id(p) not in seen:
                adap_params.append(p)
                seen.add(id(p))

head_params = []
if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        if p.requires_grad and id(p) not in seen:
            head_params.append(p)
            seen.add(id(p))

param_groups = []
if adap_params:
    param_groups.append({"params": adap_params, "lr": 5e-4, "weight_decay": 0.01})
if head_params:
    param_groups.append({"params": head_params, "lr": 1e-3, "weight_decay": 0.0})

optimizer = AdamW(param_groups)

# ✅ Steps (ตั้งค่าเบื้องต้นได้ ถ้า Pipeline ภายในสร้าง dataloader เอง)
epochs = 3
'''
tmp_pipeline = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=base_model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=epochs,
    seed=42,
    allocator=None  # ยังไม่ส่ง allocator เข้าไป
)
'''

#steps_per_epoch = len(tmp_pipeline.train_dataloader)
#estimated_total_steps = steps_per_epoch * epochs

allocator = RankAllocator(
    model=base_model,
    lora_r=32,
    target_rank=32,
    init_warmup=1,         # dummy, จะถูก override ภายใน pipeline
    final_warmup=1,        # dummy
    mask_interval=1,       # dummy
    total_step=1,
    #init_warmup=int(0.10 * estimated_total_steps),
    #final_warmup=int(0.60 * estimated_total_steps),
    #mask_interval=max(50, int(0.10 * estimated_total_steps)),
    #total_step=estimated_total_steps,
    beta1=0.85,
    beta2=0.85
)


loss_fn = nn.CrossEntropyLoss(label_smoothing=0.0)
# ✅ Fine-tuning pipeline (แนะนำใน preprocess: max_length≈320) use 320
fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=base_model,
    optimizer=optimizer,
    loss_function=loss_fn,   
    val_size=0.1,
    epochs=epochs,
    seed=42,
    allocator=allocator
)


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SVDLinear count = 90
['model.layers.0.attn.Wqkv', 'model.layers.0.attn.Wo', 'model.layers.0.mlp.Wi', 'model.layers.0.mlp.Wo', 'model.layers.1.attn.Wqkv', 'model.layers.1.attn.Wo', 'model.layers.1.mlp.Wi', 'model.layers.1.mlp.Wo', 'model.layers.2.attn.Wqkv', 'model.layers.2.attn.Wo', 'model.layers.2.mlp.Wi', 'model.layers.2.mlp.Wo', 'model.layers.3.attn.Wqkv', 'model.layers.3.attn.Wo', 'model.layers.3.mlp.Wi', 'model.layers.3.mlp.Wo', 'model.layers.4.attn.Wqkv', 'model.layers.4.attn.Wo', 'model.layers.4.mlp.Wi', 'model.layers.4.mlp.Wo']
🔒 Freezing base model weights (non-SVDLinear layers)...
🔎 Checking which parameters are trainable...
❌ FROZEN:   model.embeddings.tok_embeddings.weight
❌ FROZEN:   model.embeddings.norm.weight
✅ TRAINING: model.layers.0.attn.Wqkv.weight
✅ TRAINING: model.layers.0.attn.Wo.weight
❌ FROZEN:   model.layers.0.mlp_norm.weight
✅ TRAINING: model.layers.0.mlp.Wi.weight
✅ TRAINING: model.layers.0.mlp.Wo.weight
❌ FROZEN:   model.layers.1.attn_norm.weight
✅ TRAINING

KeyboardInterrupt: 

In [None]:
'''
from loralib.elalora import SVDLinear, RankAllocator
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import torch.nn as nn


dataset = preprocess_dataset('C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv')
# ✅ 1) โหลด tokenizer และ base model
model_name = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# ✅ 2) แปะ SVDLinear แทน Linear ทุกตัวในโมเดล
def replace_linear_with_svdlinear(model):
    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            setattr(model, name, SVDLinear(module.in_features, module.out_features, bias=module.bias is not None))
        else:
            replace_linear_with_svdlinear(module)

replace_linear_with_svdlinear(base_model)

# ✅ 3) สร้าง RankAllocator สำหรับ dynamic rank control
from loralib.elalora import RankAllocator
import math

epochs = 3
# ... สร้าง train_dataloader / val_dataloader แล้ว ...
steps = math.ceil(len(train_dataloader)) * epochs

allocator = RankAllocator(
    model=base_model,
    lora_r=16,
    target_rank=24,
    init_warmup=int(0.10*steps),
    final_warmup=int(0.60*steps),
    mask_interval=max(50, int(0.10*steps)),
    total_step=steps,
    beta1=0.85, beta2=0.85
)

adap_params = [p for m in base_model.modules() if isinstance(m, SVDLinear) for p in m.parameters()]
head_params = list(base_model.classifier.parameters()) if hasattr(base_model, "classifier") else []
optimizer = AdamW([
    {"params": adap_params, "lr": 1.5e-3, "weight_decay": 0.01},
    {"params": head_params, "lr": 2e-3, "weight_decay": 0.0},
])

fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,          # ใน preprocess ใส่ max_length=320 แล้ว
    model=base_model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=epochs,
    seed=42,
    allocator=allocator
)

'''

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


NameError: name 'train_dataloader' is not defined

In [7]:
from loralib.elalora import SVDLinear, RankAllocator
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import torch.nn as nn
import math

dataset = preprocess_dataset('C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv')

model_name = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# ✅ แปะ SVDLinear เฉพาะเลเยอร์สำคัญ (ไม่แทนทุก Linear)
TARGET_LINEAR_NAMES = {"Wqkv", "Wo", "Wi", "dense", "classifier"}
def replace_linear_with_svdlinear(module, prefix=""):
    for name, child in module.named_children():
        full = f"{prefix}.{name}" if prefix else name
        if isinstance(child, nn.Linear) and any(n in full.split('.') for n in TARGET_LINEAR_NAMES):
            in_f, out_f, has_bias = child.in_features, child.out_features, child.bias is not None
            setattr(module, name, SVDLinear(in_f, out_f, bias=has_bias))
        else:
            replace_linear_with_svdlinear(child, full)

replace_linear_with_svdlinear(base_model)

# ✅ Freeze base / train เฉพาะ SVDLinear + classifier
for p in base_model.parameters():
    p.requires_grad = False
for m in base_model.modules():
    if isinstance(m, SVDLinear):
        for p in m.parameters():
            p.requires_grad = True
# เผื่อบางรุ่น head ชื่อไม่ใช่ classifier ให้เพิ่มตรงนี้ถ้าอยากฝึก head ด้วย
if hasattr(base_model, "classifier"):
    for p in base_model.classifier.parameters():
        p.requires_grad = True

# ✅ Optimizer: LR สูงขึ้นสำหรับ adapters/head
adap_params = [p for m in base_model.modules() if isinstance(m, SVDLinear) for p in m.parameters()]
head_params = list(base_model.classifier.parameters()) if hasattr(base_model, "classifier") else []
optimizer = AdamW([
    {"params": adap_params, "lr": 1.5e-3, "weight_decay": 0.01},
    {"params": head_params, "lr": 2e-3, "weight_decay": 0.00},
])

# ✅ จำนวนสเต็ป (ถ้ารู้ batch_size ใน Pipeline ของคุณ ให้ใส่ตามจริง)
epochs = 3
# ถ้าคุณไม่รู้ batch_size/steps ของ Pipeline ให้ตั้งคร่าว ๆ ไปก่อน เช่น 2000
estimated_total_steps = 2000

allocator = RankAllocator(
    model=base_model,
    lora_r=16,                 # เดิม 8 → 16
    target_rank=24,            # เดิม 4 → 24 (ถ้าหนักค่อยลดเป็น 16)
    init_warmup=int(0.10*estimated_total_steps),
    final_warmup=int(0.60*estimated_total_steps),
    mask_interval=max(50, int(0.10*estimated_total_steps)),
    total_step=estimated_total_steps,
    beta1=0.85,
    beta2=0.85
)

# ✅ Fine-tuning pipeline (เพิ่ม epochs)
fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,       # (แนะนำให้ไปแก้ใน preprocess เป็น max_length≈320)
    model=base_model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=epochs,
    seed=42,
    allocator=allocator
)


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: some parameters appear in more than one parameter group

In [8]:
from loralib.elalora import SVDLinear, RankAllocator
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import torch.nn as nn


dataset = preprocess_dataset('C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv')
# ✅ 1) โหลด tokenizer และ base model
model_name = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# ✅ 2) แปะ SVDLinear แทน Linear ทุกตัวในโมเดล
def replace_linear_with_svdlinear(model):
    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            setattr(model, name, SVDLinear(module.in_features, module.out_features, bias=module.bias is not None))
        else:
            replace_linear_with_svdlinear(module)

replace_linear_with_svdlinear(base_model)

# ✅ 3) สร้าง RankAllocator สำหรับ dynamic rank control
from loralib.elalora import RankAllocator

#epochs = 3 # หรือจำนวน epoch ที่คุณต้องการ

allocator = RankAllocator(
    model=base_model,
    lora_r=8,
    target_rank=4,
    init_warmup=100,
    final_warmup=300,
    mask_interval=50,
    total_step=1000,         # คุณคำนวณจาก len(dataloader) x epochs ได้เลย before 1000
    beta1=0.9,
    beta2=0.99
)

# ✅ 4) ปิด gradients ของ base weights (freeze base model)
for param in base_model.parameters():
    param.requires_grad = False

for module in base_model.modules():
    if isinstance(module, SVDLinear):
        for param in module.parameters():
            param.requires_grad = True

# ✅ 5) เช็กว่า train เฉพาะ adapter จริง
for name, param in base_model.named_parameters():
    print("✅ TRAINING:" if param.requires_grad else "❌ FROZEN:", name)

# ✅ 6) Fine-tuning pipeline
optimizer = AdamW(filter(lambda p: p.requires_grad, base_model.parameters()), lr=2e-5)

fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=base_model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=1,
    seed=42,
    allocator=allocator   # << เพิ่มเข้า pipeline ถ้า custom pipeline รองรับ
)


Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


❌ FROZEN: model.embeddings.tok_embeddings.weight
❌ FROZEN: model.embeddings.norm.weight
✅ TRAINING: model.layers.0.attn.Wqkv.weight
✅ TRAINING: model.layers.0.attn.Wo.weight
❌ FROZEN: model.layers.0.mlp_norm.weight
✅ TRAINING: model.layers.0.mlp.Wi.weight
✅ TRAINING: model.layers.0.mlp.Wo.weight
❌ FROZEN: model.layers.1.attn_norm.weight
✅ TRAINING: model.layers.1.attn.Wqkv.weight
✅ TRAINING: model.layers.1.attn.Wo.weight
❌ FROZEN: model.layers.1.mlp_norm.weight
✅ TRAINING: model.layers.1.mlp.Wi.weight
✅ TRAINING: model.layers.1.mlp.Wo.weight
❌ FROZEN: model.layers.2.attn_norm.weight
✅ TRAINING: model.layers.2.attn.Wqkv.weight
✅ TRAINING: model.layers.2.attn.Wo.weight
❌ FROZEN: model.layers.2.mlp_norm.weight
✅ TRAINING: model.layers.2.mlp.Wi.weight
✅ TRAINING: model.layers.2.mlp.Wo.weight
❌ FROZEN: model.layers.3.attn_norm.weight
✅ TRAINING: model.layers.3.attn.Wqkv.weight
✅ TRAINING: model.layers.3.attn.Wo.weight
❌ FROZEN: model.layers.3.mlp_norm.weight
✅ TRAINING: model.layers.3.mlp.W

KeyboardInterrupt: 

In [None]:
'''
from loralib.elalora import ElaLoRAConfig
from peft import get_peft_model  # PeftModel hooking
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW

# 1) โหลด tokenizer และโมเดล base
model_name = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 2) กำหนด config สำหรับ ElaLoRA (dynamic rank)
elalora_config = ElaLoRAConfig(
    init_oracle_rank=8,
    mask_interval=100,       # จำนวน steps ระหว่างการ prune/expand
    prune_threshold=0.1,     # prune rank ถ้า importance ต่ำกว่า 10%
    expand_rate=1.25,        # เพิ่ม rank 25% เมื่อ layer สำคัญ
    apply_modules=["query", "key", "value", "intermediate", "output"],
    lora_alpha=16,
    lora_dropout=0.1,
)

# 3) ผูก ElaLoRA เข้า model
model = elalora_config.apply_to(base_model)

optimizer = AdamW(model.parameters(), lr=5e-5)

fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=5,
    seed=42
)

fine_tuned_model.train()

'''


In [None]:
#from peft import LoraConfig, get_peft_model

In [None]:
'''
from peft import LoraConfig, get_peft_model
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import bitsandbytes as bnb


# ✅ 1) ใช้โมเดลแม่นกว่า ModernBERT (DeBERTa-v3)
model_name = "answerdotai/ModernBERT-base"


# ✅ 2) ใช้ dataset ครึ่งเดียว train เร็วขึ้น
dataset = preprocess_dataset('C:/Users/Lenovo/Desktop/NLP/Final_project/IMDB Dataset.csv')
# Use all 50,000 row data
#dataset = dataset.sample(25000, random_state=42)

tokenizer = AutoTokenizer.from_pretrained(model_name)

# โหลด base model
base_model = AutoModelForSequenceClassification.from_pretrained("answerdotai/ModernBERT-base", num_labels=2)


# กำหนด LoRA config
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    #target_modules=["query", "value"],
    target_modules = ['dense', 'Wqkv', 'Wo', 'Wi'], 
    #target_modules = linear_names,
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"
)

# สร้างโมเดล LoRA
model = get_peft_model(base_model, peft_config)


optimizer = AdamW(model.parameters(), lr=5e-5)

fine_tuned_model = FineTuningPipeline(
    dataset=dataset,
    tokenizer=tokenizer,
    model=model,
    optimizer=optimizer,
    val_size=0.1,
    epochs=5,
    seed=42
)

'''

Some weights of ModernBertForSequenceClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🔍 Model type: <class 'peft.peft_model.PeftModelForSequenceClassification'>

===== Epoch 1/5 =====
✅ Avg Train Loss: 0.2423
🧪 Avg Val Loss:  0.2054
🎯 Val Accuracy: 0.9311
🕒 Val Time:      0:01:06.969529

===== Epoch 2/5 =====
✅ Avg Train Loss: 0.1615
🧪 Avg Val Loss:  0.1600
🎯 Val Accuracy: 0.9409
🕒 Val Time:      0:01:07.422809

===== Epoch 3/5 =====
✅ Avg Train Loss: 0.1414
🧪 Avg Val Loss:  0.1717
🎯 Val Accuracy: 0.9433
🕒 Val Time:      0:01:08.518395

===== Epoch 4/5 =====
✅ Avg Train Loss: 0.1252
🧪 Avg Val Loss:  0.1786
🎯 Val Accuracy: 0.9437
🕒 Val Time:      0:01:07.966728

===== Epoch 5/5 =====
✅ Avg Train Loss: 0.1117
🧪 Avg Val Loss:  0.1851
🎯 Val Accuracy: 0.9441
🕒 Val Time:      0:01:32.393248

✅ Total training time: 2:18:17.723577


In [None]:
'''
import pandas as pd
import torch
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm  # progress bar

BATCH_SIZE = 8  # ✅ ปรับตาม VRAM

# ✅ เลือกอุปกรณ์ (GPU ถ้ามี, ถ้าไม่มีใช้ CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")

# ✅ ย้ายโมเดลไป GPU
model = model.to(device)

# ✅ โหลด CSV
df = pd.read_csv("C:/Users/Lenovo/Desktop/NLP/Final_project/unseen_review/review.csv")


all_pred_labels = []

# ✅ predict แบบ batch
for i in tqdm(range(0, len(df), BATCH_SIZE)):
    batch_texts = df["review"].iloc[i:i+BATCH_SIZE].tolist()
    
    # ✅ tokenize + ส่งไป GPU
    inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt").to(device)
    
    # ✅ predict
    with torch.no_grad():  
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        batch_preds = probs.argmax(axis=1).cpu().numpy()  # ✅ กลับ CPU เพื่อเก็บ
    
    all_pred_labels.extend(batch_preds)

# ✅ เพิ่มผลลัพธ์เข้า DataFrame
df["predicted_label"] = all_pred_labels
df["predicted_sentiment"] = ["positive" if p == 1 else "negative" for p in all_pred_labels]

# ✅ แปลง label จริงเป็น 0/1
if "sentiment" in df.columns:  # ถ้ามี label จริง
    if df["sentiment"].dtype == object:  
        df["true_label"] = df["sentiment"].apply(lambda x: 1 if x.lower()=="positive" else 0)
    else:
        df["true_label"] = df["sentiment"]

    # ✅ คำนวณ Accuracy
    acc = accuracy_score(df["true_label"], df["predicted_label"])
    print(f"\n Accuracy: {acc*100:.2f}%")

    # ✅ Precision / Recall / F1
    print("\n=== Classification Report ===")
    print(classification_report(df["true_label"], df["predicted_label"], target_names=["negative", "positive"]))
else:
    print("\n No ground-truth labels found, skipping accuracy calculation")

# ✅ แสดงตัวอย่าง 10 row แรก
print("\n=== Sample Predictions (first 10) ===")
for _, row in df.head(10).iterrows():
    print(f"Review: {row['review'][:80]}...")
    if "sentiment" in df.columns:
        print(f"  ✅ True: {row['sentiment']} | 🔮 Predicted: {row['predicted_sentiment']}\n")
    else:
        print(f"  🔮 Predicted: {row['predicted_sentiment']}\n")

# ✅ บันทึกผลลัพธ์
#df.to_csv("C:/Users/Lenovo/Desktop/NLP/Final_project/prediction_result/reviews_with_predictions_Deberta_Lora_mode_full.csv", index=False)
#print("\n✅ Saved predictions to reviews_with_predictions_Deberta_Lora_mode_full.csv")
'''

# Save model

In [None]:
'''
merged_model = model.merge_and_unload()
merged_model.save_pretrained("merged_deberta_Lora_model")
tokenizer.save_pretrained("merged_deberta_Lora_model")


'''

('merged_deberta_Lora_model\\tokenizer_config.json',
 'merged_deberta_Lora_model\\special_tokens_map.json',
 'merged_deberta_Lora_model\\spm.model',
 'merged_deberta_Lora_model\\added_tokens.json')

In [6]:
base_model.save_pretrained("./ModernBERT_ElaLora_full_svd_1")
tokenizer.save_pretrained("./ModernBERT_ElaLora_full_svd_1")


('./ModernBERT_ElaLora_full_svd_1\\tokenizer_config.json',
 './ModernBERT_ElaLora_full_svd_1\\special_tokens_map.json',
 './ModernBERT_ElaLora_full_svd_1\\tokenizer.json')

# Import model fine-tune

In [9]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

path = "./ModernBERT_ElaLora_full_svd_1"
tokenizer = AutoTokenizer.from_pretrained(path)
model = AutoModelForSequenceClassification.from_pretrained(path)

In [12]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from loralib.elalora import SVDLinear
import torch.nn as nn

# ✅ ฟังก์ชันแปะ SVDLinear เหมือนตอน train
def replace_linear_with_svdlinear(model):
    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            setattr(model, name, SVDLinear(module.in_features, module.out_features, bias=module.bias is not None))
        else:
            replace_linear_with_svdlinear(module)

# ✅ โหลด tokenizer
tokenizer = AutoTokenizer.from_pretrained("./ModernBERT_ElaLora_full")

# ✅ โหลด base model (ยังไม่มี SVDLinear)
base_model = AutoModelForSequenceClassification.from_pretrained(
    "./ModernBERT_ElaLora_full", num_labels=2)

# ✅ แปะ SVDLinear กลับเข้าไปก่อนโหลด weights
replace_linear_with_svdlinear(base_model)

# 💥 ตอนนี้ base_model พร้อมใช้งานแล้ว
base_model.eval()




ModernBertForSequenceClassification(
  (model): ModernBertModel(
    (embeddings): ModernBertEmbeddings(
      (tok_embeddings): Embedding(50368, 768, padding_idx=50283)
      (norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
    (layers): ModuleList(
      (0): ModernBertEncoderLayer(
        (attn_norm): Identity()
        (attn): ModernBertAttention(
          (Wqkv): SVDLinear(in_features=768, out_features=2304, bias=False)
          (rotary_emb): ModernBertRotaryEmbedding()
          (Wo): SVDLinear(in_features=768, out_features=768, bias=False)
          (out_drop): Identity()
        )
        (mlp_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): ModernBertMLP(
          (Wi): SVDLinear(in_features=768, out_features=2304, bias=False)
          (act): GELUActivation()
          (drop): Dropout(p=0.0, inplace=False)
          (Wo): SVDLinear(in_features=1152, out_features=768, bias=False)
    

In [7]:
# ทำนายผลบน validation set
predictions = fine_tuned_model.predict(fine_tuned_model.val_dataloader)

# แปลง probability → label 0/1
predicted_labels = np.argmax(predictions, axis=1)
print(predicted_labels[:10])

[1 1 0 0 0 0 0 0 0 1]


In [8]:
# 1) ให้โมเดล predict บน validation set
predictions = fine_tuned_model.predict(fine_tuned_model.val_dataloader)

# 2) แปลงเป็น label 0/1
predicted_labels = np.argmax(predictions, axis=1)

# 3) ดึง label จริงของ validation set
true_labels = fine_tuned_model.df_dataset['sentiment_encoded'][-len(predicted_labels):].values

# 4) คำนวณ accuracy
val_accuracy = np.mean(predicted_labels == true_labels)
print(f"Validation Accuracy: {val_accuracy:.4f}")

Validation Accuracy: 0.9440


# Save model

In [None]:
'''
save_path = "./fine_tuned_deberta_imdb"

# ✅ save model weights + config
fine_tuned_model.model.save_pretrained(save_path)

# ✅ save tokenizer ที่ใช้ตอน fine-tune
fine_tuned_model.tokenizer.save_pretrained(save_path)
'''

('./fine_tuned_deberta_imdb\\tokenizer_config.json',
 './fine_tuned_deberta_imdb\\special_tokens_map.json',
 './fine_tuned_deberta_imdb\\spm.model',
 './fine_tuned_deberta_imdb\\added_tokens.json')

# Text prediction

In [9]:
from torch.utils.data import TensorDataset, DataLoader

text = "Encouraged by the positive comments about this film on here I was looking forward to watching this film. Bad mistake"

# tokenize text
tokens, masks = fine_tuned_model.tokenize(text)

# สร้าง DataLoader สำหรับข้อความเดียว
dataloader = DataLoader(TensorDataset(tokens, masks), batch_size=1)

# ใช้ fine_tuned_model.predict()
probs = fine_tuned_model.predict(dataloader)
print("Review Movie:", text)
print("Probabilities:", probs)
print("Predicted label:", np.argmax(probs))  # 1 = positive, 0 = negative

Review Movie: Encouraged by the positive comments about this film on here I was looking forward to watching this film. Bad mistake
Probabilities: [[0.99819607 0.00180395]]
Predicted label: 0


In [10]:
from torch.utils.data import TensorDataset, DataLoader

text = "Taut and organically gripping, Edward Dmytryk's Crossfire is a distinctive suspense thriller, an unlikely movie using the look and devices of the noir cycle."

# tokenize text
tokens, masks = fine_tuned_model.tokenize(text)

# สร้าง DataLoader สำหรับข้อความเดียว
dataloader = DataLoader(TensorDataset(tokens, masks), batch_size=1)

# ใช้ fine_tuned_model.predict()
probs = fine_tuned_model.predict(dataloader)
print("Review Movie:", text)
print("Probabilities:", probs)
print("Predicted label:", np.argmax(probs))  # 1 = positive, 0 = negative



Review Movie: Taut and organically gripping, Edward Dmytryk's Crossfire is a distinctive suspense thriller, an unlikely movie using the look and devices of the noir cycle.
Probabilities: [[0.00392544 0.99607456]]
Predicted label: 1


In [15]:
from torch.utils.data import TensorDataset, DataLoader

text = "Protocol is an implausible movie whose only saving grace is that it stars Goldie Hawn along with a good cast of supporting actors. The story revolves around a ditzy cocktail waitress who becomes famous after inadvertently saving the life of an Arab dignitary. The story goes downhill halfway through the movie and Goldie's charm just doesn't save this movie. Unless you are a Goldie Hawn fan don't go out of your way to see this film."


# tokenize text
tokens, masks = fine_tuned_model.tokenize(text)

# สร้าง DataLoader สำหรับข้อความเดียว
dataloader = DataLoader(TensorDataset(tokens, masks), batch_size=1)

# ใช้ fine_tuned_model.predict()
probs = fine_tuned_model.predict(dataloader)
print("Review Movie:", text)
print("Probabilities:", probs)
print("Predicted label:", np.argmax(probs))  # 1 = positive, 0 = negative


Review Movie: Protocol is an implausible movie whose only saving grace is that it stars Goldie Hawn along with a good cast of supporting actors. The story revolves around a ditzy cocktail waitress who becomes famous after inadvertently saving the life of an Arab dignitary. The story goes downhill halfway through the movie and Goldie's charm just doesn't save this movie. Unless you are a Goldie Hawn fan don't go out of your way to see this film.
Probabilities: [[0.9945844  0.00541563]]
Predicted label: 0


In [18]:
from torch.utils.data import TensorDataset, DataLoader

#text = "Protocol is an implausible movie whose only saving grace is that it stars Goldie Hawn along with a good cast of supporting actors. The story revolves around a ditzy cocktail waitress who becomes famous after inadvertently saving the life of an Arab dignitary. The story goes downhill halfway through the movie and Goldie's charm just doesn't save this movie. Unless you are a Goldie Hawn fan don't go out of your way to see this film."

text = "Helen (Kate Capshaw) owns a bookstore in the sleepy, coastal town of Loblolly by the Sea. Divorced, Helen has a young daughter who is going to camp for the summer, giving mother a bit more freedom"

# tokenize text
tokens, masks = fine_tuned_model.tokenize(text)

# สร้าง DataLoader สำหรับข้อความเดียว
dataloader = DataLoader(TensorDataset(tokens, masks), batch_size=1)

# ใช้ fine_tuned_model.predict()
probs = fine_tuned_model.predict(dataloader)
print("Review Movie:", text)
print("Probabilities:", probs)
print("Predicted label:", np.argmax(probs))  # 1 = positive, 0 = negative


Review Movie: Helen (Kate Capshaw) owns a bookstore in the sleepy, coastal town of Loblolly by the Sea. Divorced, Helen has a young daughter who is going to camp for the summer, giving mother a bit more freedom
Probabilities: [[0.24761915 0.75238085]]
Predicted label: 1


# Import model local

In [1]:
model_path = "./fine_tuned_deberta_Lora_imdb"

from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)



# Predict Classification

In [18]:
text = "Protocol is an implausible movie whose only saving grace is that it stars Goldie Hawn along with a good cast of supporting actors"
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)

import torch
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
print("Review:" , text)
print("Predicted:", "Positive ✅" if probs.argmax() == 1 else "Negative ❌")

Review: Protocol is an implausible movie whose only saving grace is that it stars Goldie Hawn along with a good cast of supporting actors
Predicted: Negative ❌


In [None]:
import pandas as pd

# ✅ โหลด CSV
df = pd.read_csv("reviews.csv")

# ✅ tokenize ทั้งหมด
inputs = tokenizer(df["review"].tolist(), padding=True, truncation=True, return_tensors="pt")

# ✅ predict
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
pred_labels = probs.argmax(axis=1)

# ✅ เพิ่มคอลัมน์ผลลัพธ์
df["predicted_sentiment"] = ["positive" if p == 1 else "negative" for p in pred_labels]

# ✅ ดูตัวอย่าง
print(df.head())

# ✅ ถ้าอยากบันทึกผล
df.to_csv("reviews_with_predictions.csv", index=False)