In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import ast
import torch
from torch import nn

from torch.utils.data import Dataset, DataLoader, Subset
from transformers import BertTokenizer, BertModel
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from transformers import BertForTokenClassification


In [36]:
df_all = pd.read_csv('/content/drive/MyDrive/MSC - research/df_regression_20240714.csv');

df = pd.DataFrame(columns=['texts', 'aspects', 'sentiments'])

df['texts'] = df_all['tokens'].apply(ast.literal_eval)
df['aspects'] = df_all['bio_tags'].apply(ast.literal_eval)
df['sentiments'] = df_all['sentiment_tags'].apply(ast.literal_eval)

In [37]:
# Define the mapping for aspects
aspect_mapping = {
    'O': 'O',
    'B-overall': 'overall',
    'B-length': 'length',
    'B-hip': 'hip',
    'B-buttoned': 'buttoned',
    'B-neck': 'neck',
    'B-sleeve': 'sleeve',
    'B-chest': 'chest',
    'B-waist': 'waist',
    'B-shoulder': 'shoulder',
    'B-length_indirect': 'length',
    'B-buttoned_indirect': 'buttoned',
    'B-sleeve_indirect': 'sleeve',
    'B-overall_indirect': 'overall',
    'I-overall': 'overall',
    'I-length': 'length',
    'I-hip': 'hip',
    'I-buttoned': 'buttoned',
    'I-neck': 'neck',
    'I-sleeve': 'sleeve',
    'I-chest': 'chest',
    'I-waist': 'waist',
    'I-shoulder': 'shoulder',
    'I-length_indirect': 'length',
    'I-buttoned_indirect': 'buttoned',
    'I-sleeve_indirect': 'sleeve',
    'I-overall_indirect': 'overall',
}

# Define the mapping for sentiments
sentiment_mapping = {
    'O': 'O',
    'B-negative': 'negative',
    'I-negative': 'negative',
    'B-neutral': 'neutral',
    'I-neutral': 'neutral',
    'B-positive': 'positive',
    'I-positive': 'positive',
}

# Function to apply mappings
def apply_mapping(value, mapping):
    return [mapping.get(item, 'out') for item in value]

# Update the DataFrame
df['aspects'] = df['aspects'].apply(apply_mapping, args=(aspect_mapping,))
df['sentiments'] = df['sentiments'].apply(apply_mapping, args=(sentiment_mapping,))


In [38]:
def get_labels(column):
  # Extract distinct values from the lists
  distinct_values = set()
  df[column].apply(lambda x: distinct_values.update(x))

  # Convert the set to a list if you need a list format
  distinct_values_list = list(distinct_values)
  return distinct_values_list


In [62]:
aspect_count = len(get_labels('aspects'))
sentiment_count = len(get_labels('sentiments'))
max_length = df['texts'].apply(len).max()
out_of_aspect_id = -100

In [86]:
def get_length_frequencies(column):
    lengths = df[column].apply(len)
    return lengths.value_counts()

# Get frequencies for each column
tokens_length_freq = get_length_frequencies('texts')
aspect_length_freq = get_length_frequencies('aspects')
sentiment_length_freq = get_length_frequencies('sentiments')

# Display the frequencies
print("Tokens Length Frequencies:\n", tokens_length_freq)
print("\nAspect Length Frequencies:\n", aspect_length_freq)
print("\nSentiment Length Frequencies:\n", sentiment_length_freq)



# Filter rows where the length of lists in 'tokens' column is 15
df = df[df['texts'].apply(len) == 15]

# View the filtered DataFrame
print(len(df))

Tokens Length Frequencies:
 texts
15    11
19     9
14     9
21     9
10     8
22     6
18     6
20     6
12     6
26     5
23     5
17     5
13     5
16     4
30     4
27     4
9      4
28     4
8      4
24     4
5      3
36     3
7      3
11     3
38     3
6      3
41     3
25     3
31     2
32     2
46     2
33     2
40     2
29     2
52     1
4      1
55     1
48     1
56     1
34     1
61     1
37     1
47     1
53     1
57     1
51     1
Name: count, dtype: int64

Aspect Length Frequencies:
 aspects
15    11
19     9
14     9
21     9
10     8
22     6
18     6
20     6
12     6
26     5
23     5
17     5
13     5
16     4
30     4
27     4
9      4
28     4
8      4
24     4
5      3
36     3
7      3
11     3
38     3
6      3
41     3
25     3
31     2
32     2
46     2
33     2
40     2
29     2
52     1
4      1
55     1
48     1
56     1
34     1
61     1
37     1
47     1
53     1
57     1
51     1
Name: count, dtype: int64

Sentiment Length Frequencies:
 sentiments
15    

In [87]:
class ABSADataset(Dataset):
    def __init__(self, df, max_len=max_length):
        self.df = df
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.texts = df['texts']
        self.aspects = df['aspects']
        self.sentiments = df['sentiments']
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        aspect_labels = self.aspects[idx]
        sentiment_labels = self.sentiments[idx]

        encoding = self.tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=self.max_len)

        # Convert labels to tensor and handle 'O' as ignore index for loss calculation
        aspect_labels = torch.tensor([label_to_id(aspect) for aspect in aspect_labels], dtype=torch.long)
        sentiment_labels = torch.tensor([sentiment_to_id(sentiment) for sentiment in sentiment_labels], dtype=torch.long)

        return encoding, aspect_labels, sentiment_labels

def label_to_id(label):
    # label_dict = {'O': -100, 'B-overall': 0, 'B-length_indirect': 1, 'B-hip': 2, 'B-buttoned_indirect': 3, 'B-neck': 4,
    #               'B-sleeve': 5, 'B-sleeve_indirect': 6, 'B-buttoned': 7, 'B-shoulder': 8, 'B-overall_indirect': 9,
    #               'B-length': 10, 'B-chest': 11, 'B-waist': 12}
    label_dict = {'O': out_of_aspect_id, 'overall': 0, 'length': 1, 'hip': 2, 'buttoned': 3, 'neck': 4,
                  'sleeve': 5, 'shoulder': 6,'chest': 6, 'waist': 8}
    return label_dict.get(label, out_of_aspect_id)

def sentiment_to_id(sentiment):
    # sentiment_dict = {'O': -100, 'B-negative': 0, 'B-neutral': 1, 'I-negative': 2, 'B-positive': 3, 'I-positive': 4}
    sentiment_dict = {'O': out_of_aspect_id, 'negative': 0, 'neutral': 1, 'positive': 2}
    return sentiment_dict.get(sentiment, out_of_aspect_id)


In [88]:
def collate_fn(batch):
    tokens = [item[0] for item in batch]
    aspect_labels = [item[1] for item in batch]
    sentiment_labels = [item[2] for item in batch]

    # Pad tokens
    tokens_padded = torch.nn.utils.rnn.pad_sequence([item['input_ids'].squeeze() for item in tokens], batch_first=True, padding_value=0)
    attention_mask_padded = torch.nn.utils.rnn.pad_sequence([item['attention_mask'].squeeze() for item in tokens], batch_first=True, padding_value=0)

    # Pad labels
    aspect_labels_padded = torch.nn.utils.rnn.pad_sequence(aspect_labels, batch_first=True, padding_value=out_of_aspect_id)
    sentiment_labels_padded = torch.nn.utils.rnn.pad_sequence(sentiment_labels, batch_first=True, padding_value=out_of_aspect_id)

    # Reconstruct the tokens dictionary with padded sequences
    tokens_padded_dict = {
        'input_ids': tokens_padded,
        'attention_mask': attention_mask_padded
    }

    return tokens_padded_dict, aspect_labels_padded, sentiment_labels_padded


In [89]:
class BaseModel:
    def __init__(self, model_name='bert-base-uncased', num_labels=1):
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertForTokenClassification.from_pretrained(model_name, num_labels=num_labels)
        self.num_labels = num_labels

    def train(self, train_loader, val_loader, epochs=3, lr=1e-5):
      return

    def evaluate(self, val_loader):
      return 1,1

    def compute_loss_and_predictions(self, tokens, labels):
        outputs = self.model(**tokens, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
        return loss, preds

    def compute_accuracy(self, preds, labels):
        correct = (preds == labels).sum().item()
        total = labels.numel()
        accuracy = correct / total
        return accuracy

    def plot_training_and_validation(self, train_losses, train_accuracies, val_losses, val_accuracies):
        plt.figure(figsize=(12, 6))
        plt.subplot(1, 2, 1)
        plt.plot(train_losses, label='Training Loss')
        plt.plot(val_losses, label='Validation Loss')
        plt.title('Training & Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(train_accuracies, label='Training Accuracy')
        plt.plot(val_accuracies, label='Validation Accuracy')
        plt.title('Training & Validation Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.show()

    def test(self, test_loader):
        _, accuracy = self.evaluate(test_loader)
        print(f"Test Accuracy: {accuracy:.4f}")



In [90]:
class AspectModel(BaseModel):
    def __init__(self, model_name='bert-base-uncased', num_labels=aspect_count):
        super().__init__(model_name, num_labels)

    def train(self, train_loader, val_loader, epochs=3, lr=1e-5):
        self.model.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        train_losses, train_accuracies = [], []
        val_losses, val_accuracies = [], []

        for epoch in range(epochs):
            for tokens, aspect_labels, _ in train_loader:
                optimizer.zero_grad()
                loss, preds = self.compute_loss_and_predictions(tokens, aspect_labels)
                loss.backward()
                optimizer.step()

                accuracy = self.compute_accuracy(preds, aspect_labels)
                train_losses.append(loss.item())
                train_accuracies.append(accuracy)

            print(f"Epoch {epoch+1}: Train Loss = {train_losses[-1]:.4f}, Train Accuracy = {train_accuracies[-1]:.4f}")
            val_loss, val_accuracy = self.evaluate(val_loader)
            val_losses.append(val_loss)
            val_accuracies.append(val_accuracy)

        self.plot_training_and_validation(train_losses, train_accuracies, val_losses, val_accuracies)

    def evaluate(self, val_loader):
        self.model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for tokens, aspect_labels, _ in val_loader:
                loss, preds = self.compute_loss_and_predictions(tokens, aspect_labels)
                val_loss += loss.item()
                accuracy = self.compute_accuracy(preds, aspect_labels)
                val_total += aspect_labels.numel()
                all_preds.extend(preds.view(-1).tolist())
                all_labels.extend(aspect_labels.view(-1).tolist())

        val_avg_loss = val_loss / len(val_loader)
        print("Evaluation Classification Report:")
        print(classification_report(all_labels, all_preds, labels=list(range(self.num_labels)), zero_division=0))
        return val_avg_loss, accuracy

In [91]:
class SentimentModel(BaseModel):
    def __init__(self, model_name='bert-base-uncased', num_labels=sentiment_count):  # Typically, sentiment analysis has fewer labels
        super().__init__(model_name, num_labels)

    def train(self, train_loader, val_loader, epochs=3, lr=1e-5):
        self.model.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        train_losses, train_accuracies = [], []
        val_losses, val_accuracies = [], []

        for epoch in range(epochs):
            for tokens, _, sentiment_labels in train_loader:
                optimizer.zero_grad()
                loss, preds = self.compute_loss_and_predictions(tokens, sentiment_labels)
                loss.backward()
                optimizer.step()

                accuracy = self.compute_accuracy(preds, sentiment_labels)
                train_losses.append(loss.item())
                train_accuracies.append(accuracy)

            print(f"Epoch {epoch+1}: Train Loss = {train_losses[-1]:.4f}, Train Accuracy = {train_accuracies[-1]:.4f}")
            val_loss, val_accuracy = self.evaluate(val_loader)
            val_losses.append(val_loss)
            val_accuracies.append(val_accuracy)

        self.plot_training_and_validation(train_losses, train_accuracies, val_losses, val_accuracies)

    def evaluate(self, val_loader):
        self.model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for tokens, _, sentiment_labels in val_loader:
                loss, preds = self.compute_loss_and_predictions(tokens, sentiment_labels)
                val_loss += loss.item()
                accuracy = self.compute_accuracy(preds, sentiment_labels)
                val_total += sentiment_labels.numel()
                all_preds.extend(preds.view(-1).tolist())
                all_labels.extend(sentiment_labels.view(-1).tolist())

        val_avg_loss = val_loss / len(val_loader)
        print("Evaluation Classification Report:")
        print(classification_report(all_labels, all_preds, labels=list(range(self.num_labels)), zero_division=0))
        return val_avg_loss, accuracy


In [92]:
def split_dataset(dataset, val_split=0.4, test_split=0.4):
    indices = list(range(len(dataset)))

    # Split into train+validation and test sets
    train_val_indices, test_indices = train_test_split(indices, test_size=test_split, random_state=42)

    # Split train+validation set into train and validation sets
    train_indices, val_indices = train_test_split(train_val_indices, test_size=val_split, random_state=42)

    train_set = Subset(dataset, train_indices)
    val_set = Subset(dataset, val_indices)
    test_set = Subset(dataset, test_indices)

    return train_set, val_set, test_set


In [93]:

# Assuming ABSADataset is defined as in the previous messages
dataset = ABSADataset(df)
# loader = DataLoader(dataset, batch_size=8, shuffle=True) # check what batch_size is. 32 seems to be defuat

train_set, val_set, test_set = split_dataset(dataset)

# Create DataLoaders
train_loader = DataLoader(train_set, batch_size=16, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_set, batch_size=16, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_set, batch_size=16, shuffle=False, collate_fn=collate_fn)

In [95]:
# Initialize models
aspect_model = AspectModel()
sentiment_model = SentimentModel()

# Example training setup, remember to define appropriate train, validate, and test datasets
# Using the same loader for demonstration; in practice, you should separate these
aspect_model.train(train_loader, val_loader, epochs=100, lr=1e-5)
# sentiment_model.train(train_loader, val_loader, epochs=100, lr=1e-5)


aspect_model.test(test_loader)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


KeyError: 4

In [None]:
# class AspectModel:
#     def __init__(self, model_name='bert-base-uncased', num_labels=aspect_count):
#         self.tokenizer = BertTokenizer.from_pretrained(model_name)
#         self.model = BertForTokenClassification.from_pretrained(model_name, num_labels=num_labels)

#     def train(self, train_loader, val_loader, epochs=3, lr=1e-5):
#         self.model.train()
#         optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
#         train_losses, train_accuracies = [], []
#         val_losses, val_accuracies = [], []

#         for epoch in range(epochs):
#             for tokens, aspect_labels, _ in train_loader:
#                 optimizer.zero_grad()
#                 loss, preds = self.compute_loss_and_predictions(tokens, aspect_labels)
#                 loss.backward()
#                 optimizer.step()

#                 accuracy = self.compute_accuracy(preds, aspect_labels)
#                 train_losses.append(loss.item())
#                 train_accuracies.append(accuracy)

#             print(f"Epoch {epoch+1}: Train Loss = {train_losses[-1]:.4f}, Train Accuracy = {train_accuracies[-1]:.4f}")
#             val_loss, val_accuracy = self.evaluate(val_loader)
#             val_losses.append(val_loss)
#             val_accuracies.append(val_accuracy)

#         self.plot_training_and_validation(train_losses, train_accuracies, val_losses, val_accuracies)

#     def evaluate(self, val_loader):
#         self.model.eval()
#         val_loss, val_correct, val_total = 0, 0, 0
#         all_preds, all_labels = [], []
#         with torch.no_grad():
#             for tokens, aspect_labels, _ in val_loader:
#                 loss, preds = self.compute_loss_and_predictions(tokens, aspect_labels)
#                 val_loss += loss.item()
#                 accuracy = self.compute_accuracy(preds, aspect_labels)
#                 val_total += aspect_labels.numel()
#                 all_preds.extend(preds.view(-1).tolist())
#                 all_labels.extend(aspect_labels.view(-1).tolist())

#         val_avg_loss = val_loss / len(val_loader)
#         print("Evaluation Classification Report:")
#         print(classification_report(all_labels, all_preds, labels=list(range(self.num_labels)), zero_division=0))
#         return val_avg_loss, accuracy

#     def compute_loss_and_predictions(self, tokens, labels):
#         outputs = self.model(**tokens, labels=labels)
#         loss = outputs.loss
#         logits = outputs.logits
#         preds = torch.argmax(logits, dim=-1)
#         return loss, preds

#     def compute_accuracy(self, preds, labels):
#         correct = (preds == labels).sum().item()
#         total = labels.numel()
#         accuracy = correct / total
#         return accuracy

#     def plot_training_and_validation(self, train_losses, train_accuracies, val_losses, val_accuracies):
#         plt.figure(figsize=(12, 6))
#         plt.subplot(1, 2, 1)
#         plt.plot(train_losses, label='Training Loss')
#         plt.plot(val_losses, label='Validation Loss')
#         plt.title('Training & Validation Loss')
#         plt.xlabel('Epoch')
#         plt.ylabel('Loss')
#         plt.legend()

#         plt.subplot(1, 2, 2)
#         plt.plot(train_accuracies, label='Training Accuracy')
#         plt.plot(val_accuracies, label='Validation Accuracy')
#         plt.title('Training & Validation Accuracy')
#         plt.xlabel('Epoch')
#         plt.ylabel('Accuracy')
#         plt.legend()
#         plt.show()

#     def test(self, test_loader):
#         _, accuracy = self.evaluate(test_loader)
#         print(f"Test Accuracy: {accuracy:.4f}")



In [None]:
# class AspectSentimentDataset(Dataset):
#     def __init__(self, df):
#         self.df = df
#         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

#     def __len__(self):
#         return len(self.df)

#     def __getitem__(self, idx):
#         item = self.df.iloc[idx]
#         encoding = self.tokenizer(
#             item['tokens'],
#             max_length=max_length,
#             truncation=True,
#             padding='max_length',
#             return_tensors="pt"
#         )
#         return {
#             'input_ids': encoding['input_ids'].flatten(),
#             'attention_mask': encoding['attention_mask'].flatten(),
#             'aspect_labels': torch.tensor(item['aspect_labels']),
#             'sentiment_labels': torch.tensor(item['sentiment_labels'])
#         }

In [None]:
# class AspectExtractor:
#     def __init__(self, num_labels):
#         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#         self.model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)
#         self.model.train()

#     def train(self, dataloader, device, epochs=3, lr=2e-5):
#         optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
#         self.model.to(device)
#         predictions, true_labels = [], []
#         for epoch in range(epochs):
#             total_loss = 0
#             for batch in tqdm(dataloader):
#                 self.model.train()
#                 input_ids = batch['input_ids'].to(device)
#                 attention_mask = batch['attention_mask'].to(device)
#                 labels = batch['aspect_labels'].to(device)

#                 outputs = self.model(input_ids, attention_mask=attention_mask, labels=labels)
#                 loss = outputs.loss
#                 optimizer.zero_grad()
#                 loss.backward()
#                 optimizer.step()
#                 total_loss += loss.item()
#                 logits = outputs.logits.argmax(dim=-1)
#                 predictions.extend(logits.cpu().numpy())
#                 true_labels.extend(labels.cpu().numpy())
#             print(f"Epoch {epoch+1}, Loss: {total_loss/len(dataloader)}")

#         print("Train - Classification Report:")
#         print(classification_report(true_labels, predictions))
#         return total_loss / len(dataloader)

#     def evaluate(self, dataloader, device):
#         self.model.eval()
#         total_loss = 0
#         predictions, true_labels = [], []
#         with torch.no_grad():
#             for batch in tqdm(dataloader):
#                 input_ids = batch['input_ids'].to(device)
#                 attention_mask = batch['attention_mask'].to(device)
#                 labels = batch['aspect_labels'].to(device)

#                 outputs = self.model(input_ids, attention_mask=attention_mask, labels=labels)
#                 loss = outputs.loss
#                 total_loss += loss.item()
#                 logits = outputs.logits.argmax(dim=-1)
#                 predictions.extend(logits.cpu().numpy())
#                 true_labels.extend(labels.cpu().numpy())

#         print("Evaluate - Classification Report:")
#         print(classification_report(true_labels, predictions))
#         return total_loss / len(dataloader)


In [None]:
# class SentimentClassifier:
#     def __init__(self, num_labels):
#         self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#         self.model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)
#         self.model.train()

#     def train(self, dataloader, device, epochs=3, lr=2e-5):
#         optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
#         self.model.to(device)
#         predictions, true_labels = [], []
#         for epoch in range(epochs):
#             total_loss = 0
#             self.model.train()
#             for batch in tqdm(dataloader):
#                 input_ids = batch['input_ids'].to(device)
#                 attention_mask = batch['attention_mask'].to(device)
#                 sentiment_labels = batch['sentiment_labels'].to(device)

#                 optimizer.zero_grad()
#                 outputs = self.model(input_ids, attention_mask=attention_mask, labels=sentiment_labels)
#                 loss = outputs.loss
#                 loss.backward()
#                 optimizer.step()
#                 total_loss += loss.item()
#                 logits = outputs.logits.argmax(dim=-1)
#                 predictions.extend(logits.cpu().numpy())
#                 true_labels.extend(sentiment_labels.cpu().numpy())

#             print(f"Epoch {epoch+1}, Loss: {total_loss / len(dataloader)}")

#         print("Train - Token-Level Sentiment Classification Report:")
#         print(classification_report(true_labels.flatten(), predictions.flatten(), labels=np.unique(predictions)))
#         return total_loss / len(dataloader)

#     def evaluate(self, dataloader, device):
#         self.model.eval()
#         total_loss = 0
#         predictions, true_labels = [], []
#         with torch.no_grad():
#             for batch in tqdm(dataloader):
#                 input_ids = batch['input_ids'].to(device)
#                 attention_mask = batch['attention_mask'].to(device)
#                 sentiment_labels = batch['sentiment_labels'].to(device)

#                 outputs = self.model(input_ids, attention_mask=attention_mask, labels=sentiment_labels)
#                 loss = outputs.loss
#                 total_loss += loss.item()
#                 logits = outputs.logits.argmax(dim=-1)
#                 predictions.extend(logits.cpu().numpy())
#                 true_labels.extend(sentiment_labels.cpu().numpy())

#         print("Evaluate - Token-Level Sentiment Classification Report:")
#         print(classification_report(true_labels.flatten(), predictions.flatten(), labels=np.unique(predictions)))
#         return total_loss / len(dataloader)


In [None]:
# # train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# # train_dataset = AspectSentimentDataset(train_df)
# # test_dataset = AspectSentimentDataset(test_df)

# # Instantiate models
# aspect_model = AspectExtractor(num_labels=get_label_num('aspect_labels'))  # Adapt num_labels_aspect as per your dataset
# sentiment_model = SentimentClassifier(num_labels=get_label_num('sentiment_labels'))  # Adapt num_labels_sentiment as per your dataset

# # Device configuration
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# # Train and evaluate sentiment model
# train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)
# sentiment_model.train(train_loader, device)
# sentiment_model.evaluate(test_loader, device)
