In [1]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm  
from dataloader import *
from model import * 
from nltk.translate.bleu_score import sentence_bleu
import nltk

def collate_fn(batch):
    inputs, labels = zip(*batch)
    max_length = max(len(seq) for seq in inputs)
    
    # Convert each sequence to a list, pad with 0, and convert to tensor
    padded_inputs = [torch.cat([seq, torch.zeros(max_length - len(seq), dtype=torch.long)]) for seq in inputs]
    lengths = [len(seq) for seq in inputs]
    
    return torch.stack(padded_inputs), torch.tensor(labels, dtype=torch.float), lengths

# Hyperparameters
num_epochs = 10
learning_rate = 0.001
target_confidence = 0.8 

In [2]:
 
data_dir = "./data/sentiment_style_transfer/yelp"
vocab = build_vocab(data_dir)
dataset = TextDataset(data_dir, vocab)
data_loader = DataLoader(dataset, batch_size=64, collate_fn=collate_fn, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = StyleTransferModel(len(vocab), 300, 256, 16, 128).to(device)  
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [2]:

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(data_loader, desc=f"Epoch {epoch + 1}/{num_epochs}", unit="batch")
    
    for input_tokens, labels, lengths in progress_bar:
        input_tokens = input_tokens.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        x_reconstructed, style_mean, content_mean, s_prime = model(input_tokens, target_confidence)
        style_logvar = torch.zeros_like(style_mean)
        content_logvar = torch.zeros_like(content_mean)
        loss = vae_loss(x_reconstructed, input_tokens, style_mean, style_logvar, content_mean, content_logvar)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
        # Update the progress bar with the current loss
        progress_bar.set_postfix(loss=epoch_loss / (progress_bar.n + 1))
    
    print(f"Epoch {epoch + 1}/{num_epochs} completed. Average Loss: {epoch_loss / len(data_loader)}")


Epoch 1/10: 100%|██████████| 6926/6926 [05:34<00:00, 20.72batch/s, loss=0.618]


Epoch 1/10 completed. Average Loss: 0.6183889831284934


Epoch 2/10: 100%|██████████| 6926/6926 [05:37<00:00, 20.50batch/s, loss=0.229] 


Epoch 2/10 completed. Average Loss: 0.22921297018383416


Epoch 3/10: 100%|██████████| 6926/6926 [05:34<00:00, 20.71batch/s, loss=0.299]


Epoch 3/10 completed. Average Loss: 0.2993834251228307


Epoch 4/10: 100%|██████████| 6926/6926 [05:33<00:00, 20.77batch/s, loss=0.224] 


Epoch 4/10 completed. Average Loss: 0.22419913678819037


Epoch 5/10: 100%|██████████| 6926/6926 [05:29<00:00, 21.01batch/s, loss=0.616] 


Epoch 5/10 completed. Average Loss: 0.6160696460356646


Epoch 6/10: 100%|██████████| 6926/6926 [05:30<00:00, 20.96batch/s, loss=0.33]  


Epoch 6/10 completed. Average Loss: 0.329525139434294


Epoch 7/10: 100%|██████████| 6926/6926 [05:35<00:00, 20.62batch/s, loss=0.279]


Epoch 7/10 completed. Average Loss: 0.2789108553194553


Epoch 8/10: 100%|██████████| 6926/6926 [05:34<00:00, 20.73batch/s, loss=0.391]


Epoch 8/10 completed. Average Loss: 0.3910796397221144


Epoch 9/10: 100%|██████████| 6926/6926 [05:30<00:00, 20.93batch/s, loss=0.348]


Epoch 9/10 completed. Average Loss: 0.34840153873824936


Epoch 10/10: 100%|██████████| 6926/6926 [05:25<00:00, 21.27batch/s, loss=0.251]

Epoch 10/10 completed. Average Loss: 0.2513471563543881





In [11]:
torch.save(model, 'model_complete.pth')

In [8]:
model_1 = torch.load('model_complete.pth') 

  model_1 = torch.load('model_complete.pth')


In [9]:
import torch
from torch.utils.data import Dataset, DataLoader
import os
from collections import Counter

class TextDatasetTest(Dataset):
    def __init__(self, data_dir, vocab):
        super(TextDatasetTest, self).__init__()
        self.data = []
        self.vocab = vocab

        # Load data from the files
        files = ["sentiment.test.0", "sentiment.test.1"]
        for filename in files:
            file_path = os.path.join(data_dir, filename)
            with open(file_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()
                for line in lines:
                    tokens = line.strip().split()
                    label = 1 if filename.endswith('.1') else 0  # Binary label
                    self.data.append((tokens, label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tokens, label = self.data[idx]
        token_ids = [self.vocab.get(token, self.vocab['<UNK>']) for token in tokens]
        return torch.tensor(token_ids, dtype=torch.long), torch.tensor(label, dtype=torch.long)

In [10]:
vocab = build_vocab(data_dir)
dataset = TextDatasetTest(data_dir, vocab)
data_loader_test = DataLoader(dataset, batch_size=64, collate_fn=collate_fn, shuffle=True)

In [11]:
# Download necessary NLTK resources
nltk.download('punkt')

# Function to convert token IDs back to words using the vocabulary
def tokens_to_words(token_ids, vocab):
    inv_vocab = {v: k for k, v in vocab.items()}
    return [inv_vocab.get(token_id, '<UNK>') for token_id in token_ids if token_id != 0]  # Exclude padding

# Function to calculate BLEU score for a batch
def calculate_bleu_score(data_loader, model, vocab, device):
    model.eval()  # Set the model to evaluation mode
    total_bleu_score = 0
    num_sentences = 0
    
    counter = 0
    with torch.no_grad():    # Train Style Classifier
    classifier = train_style_classifier(data_loader_train, vocab_size, device)

    print("\n--- BLEU-S Score (Content Preservation) ---")
    bleu_score = calculate_bleu_score(data_loader_test, model, vocab, device)

    print("\n--- Style Transfer Accuracy ---")
    style_transfer_accuracy = evaluate_style_transfer(data_loader_test, model, classifier, vocab, device)

    print("\n--- Final Results ---")
    print(f"BLEU-S Score: {bleu_score:.4f}")
    print(f"Style Transfer Accuracy: {style_transfer_accuracy:.4f}")

    return bleu_score, style_transfer_accuracy
        for input_tokens, _, lengths in data_loader:
            input_tokens = input_tokens.to(device)
            x_reconstructed, _, _, _ = model(input_tokens)
            x_reconstructed = x_reconstructed.argmax(dim=-1)  # Get the predicted token IDs

            # Calculate BLEU score for each sentence
            for i in range(len(input_tokens)):
                original_sentence = tokens_to_words(input_tokens[i].tolist(), vocab)
                reconstructed_sentence = tokens_to_words(x_reconstructed[i].tolist(), vocab)

                counter += 1
                if counter % 100 == 0:
                    print(original_sentence, reconstructed_sentence)
                # Calculate BLEU score
                bleu_score = sentence_bleu([original_sentence], reconstructed_sentence)
                total_bleu_score += bleu_score
                num_sentences += 1

    # Return the average BLEU score
    return total_bleu_score / num_sentences if num_sentences > 0 else 0

# Calculate the BLEU score
bleu_score = calculate_bleu_score(data_loader_test, model_1, vocab, device)
print(f"Average BLEU Score: {bleu_score:.4f}")

IndentationError: expected an indented block (2748080889.py, line 17)

In [12]:
def tokens_to_words(token_ids, vocab):
    inv_vocab = {v: k for k, v in vocab.items()}
    return [inv_vocab.get(token_id, '<UNK>') for token_id in token_ids if token_id != 0]  # Exclude padding

# Inspect some sentences from the data loader
model.eval()  # Set the model to evaluation mode
model_1.eval()
with torch.no_grad():
    for input_tokens, _, lengths in data_loader_test:
        input_tokens = input_tokens.to(device)
        x_reconstructed, _, _, _ = model_1(input_tokens)
        x_reconstructed = x_reconstructed.argmax(dim=-1)  # Get the predicted token IDs

        # Print a few input and output sentences
        for i in range(5):  # Print 5 examples
            original_sentence = tokens_to_words(input_tokens[i].tolist(), vocab)
            reconstructed_sentence = tokens_to_words(x_reconstructed[i].tolist(), vocab)

            print("Original Sentence: \t\t\t", " ".join(original_sentence))
            print("Reconstructed Sentence: \t\t", " ".join(reconstructed_sentence))
            print()

        break  # Only inspect the first batch

Original Sentence: 			 they only received one star because you have to provide a rating .
Reconstructed Sentence: 		 they only received one star because you have to provide a rating .

Original Sentence: 			 always takes way too long even if you 're the only one there .
Reconstructed Sentence: 		 always takes way too long even if you 're the only one there .

Original Sentence: 			 she could not and would not explain herself .
Reconstructed Sentence: 		 she could not and would not explain herself .

Original Sentence: 			 all she did was give me the run around and lied and bs everything .
Reconstructed Sentence: 		 all she did was give me the run around and lied and bs everything .

Original Sentence: 			 it does not take that long to cook sliders !
Reconstructed Sentence: 		 it does not take that long to cook sliders !



In [13]:
# Simple Style Classifier
class StyleClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(StyleClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.GRU(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        _, h = self.rnn(x)
        h = h[-1]  # Take the last hidden state
        output = self.fc(h)
        return self.sigmoid(output).squeeze()

In [14]:
vocab

{'<PAD>': 0,
 '<UNK>': 1,
 '.': 2,
 'the': 3,
 'and': 4,
 'i': 5,
 '!': 6,
 ',': 7,
 'is': 8,
 'was': 9,
 'a': 10,
 'to': 11,
 'it': 12,
 'this': 13,
 'great': 14,
 'food': 15,
 'for': 16,
 'place': 17,
 'service': 18,
 'good': 19,
 'of': 20,
 'my': 21,
 'in': 22,
 'very': 23,
 'they': 24,
 'are': 25,
 'not': 26,
 '_num_': 27,
 'with': 28,
 'you': 29,
 "n't": 30,
 'have': 31,
 "'s": 32,
 'we': 33,
 'that': 34,
 'so': 35,
 'here': 36,
 'love': 37,
 'but': 38,
 'had': 39,
 'friendly': 40,
 'best': 41,
 'were': 42,
 'always': 43,
 'staff': 44,
 'on': 45,
 'be': 46,
 'at': 47,
 'all': 48,
 'will': 49,
 'really': 50,
 'back': 51,
 'there': 52,
 'nice': 53,
 'just': 54,
 'no': 55,
 'as': 56,
 'me': 57,
 'do': 58,
 'their': 59,
 'amazing': 60,
 'recommend': 61,
 'time': 62,
 'would': 63,
 'our': 64,
 'one': 65,
 'delicious': 66,
 'definitely': 67,
 'experience': 68,
 'like': 69,
 'out': 70,
 'well': 71,
 'also': 72,
 'did': 73,
 'she': 74,
 'go': 75,
 'ever': 76,
 'excellent': 77,
 'too': 78,

In [15]:
vocab

{'<PAD>': 0,
 '<UNK>': 1,
 '.': 2,
 'the': 3,
 'and': 4,
 'i': 5,
 '!': 6,
 ',': 7,
 'is': 8,
 'was': 9,
 'a': 10,
 'to': 11,
 'it': 12,
 'this': 13,
 'great': 14,
 'food': 15,
 'for': 16,
 'place': 17,
 'service': 18,
 'good': 19,
 'of': 20,
 'my': 21,
 'in': 22,
 'very': 23,
 'they': 24,
 'are': 25,
 'not': 26,
 '_num_': 27,
 'with': 28,
 'you': 29,
 "n't": 30,
 'have': 31,
 "'s": 32,
 'we': 33,
 'that': 34,
 'so': 35,
 'here': 36,
 'love': 37,
 'but': 38,
 'had': 39,
 'friendly': 40,
 'best': 41,
 'were': 42,
 'always': 43,
 'staff': 44,
 'on': 45,
 'be': 46,
 'at': 47,
 'all': 48,
 'will': 49,
 'really': 50,
 'back': 51,
 'there': 52,
 'nice': 53,
 'just': 54,
 'no': 55,
 'as': 56,
 'me': 57,
 'do': 58,
 'their': 59,
 'amazing': 60,
 'recommend': 61,
 'time': 62,
 'would': 63,
 'our': 64,
 'one': 65,
 'delicious': 66,
 'definitely': 67,
 'experience': 68,
 'like': 69,
 'out': 70,
 'well': 71,
 'also': 72,
 'did': 73,
 'she': 74,
 'go': 75,
 'ever': 76,
 'excellent': 77,
 'too': 78,

In [16]:
def train_style_classifier(data_loader, vocab_size, device):
    classifier = StyleClassifier(vocab_size, 300, 128).to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

    classifier.train()
    for epoch in range(20):  # Train for a few epochs
        total_loss = 0
        for input_tokens, labels, _ in data_loader:  # Adjusted to unpack three values
            input_tokens = input_tokens.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            predictions = classifier(input_tokens)
            loss = criterion(predictions, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}/5, Loss: {total_loss / len(data_loader)}")
    
    return classifier

def evaluate_style_transfer(data_loader, model, classifier, device):
    model.eval()
    classifier.eval()
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for input_tokens, labels, _ in data_loader:
            input_tokens = input_tokens.to(device)
            labels = labels.to(device)

            # Get the reconstructed sentences
            x_reconstructed, _, _, _ = model(input_tokens)
            x_reconstructed = x_reconstructed.argmax(dim=-1)

            # Predict the style of the reconstructed sentences
            style_predictions = classifier(x_reconstructed)
            style_labels = (style_predictions > 0.5).float()
            
            correct_predictions += (style_labels == labels).sum().item()
            total_predictions += labels.size(0)
    
    accuracy = correct_predictions / total_predictions
    print(f"Style Transfer Accuracy: {accuracy:.4f}")

In [17]:
classifier = train_style_classifier(data_loader, len(vocab), device)
evaluate_style_transfer(data_loader_test, model_1, classifier, device)

KeyboardInterrupt: 

In [18]:
#### updated code 
import torch
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import nltk
nltk.download('punkt')

def tokens_to_words(token_ids, vocab):
    inv_vocab = {v: k for k, v in vocab.items()}
    return [inv_vocab.get(token_id, '<UNK>') for token_id in token_ids if token_id != 0]

def calculate_bleu_score(data_loader, model, vocab, device):
    model.eval()
    total_bleu_score = 0
    num_sentences = 0
    smoothing_fn = SmoothingFunction().method1

    print("\nBLEU-S: Evaluating content preservation...\n")
    with torch.no_grad():
        for input_tokens, _, lengths in data_loader:
            input_tokens = input_tokens.to(device)
            x_reconstructed, _, _, _ = model(input_tokens)
            x_reconstructed = x_reconstructed.argmax(dim=-1)

            for i in range(min(5, len(input_tokens))):  
                original_sentence = tokens_to_words(input_tokens[i].tolist(), vocab)
                reconstructed_sentence = tokens_to_words(x_reconstructed[i].tolist(), vocab)
                print(f"Original: {' '.join(original_sentence)}")
                print(f"Reconstructed: {' '.join(reconstructed_sentence)}\n")

                bleu_score = sentence_bleu([original_sentence], reconstructed_sentence, smoothing_function=smoothing_fn)
                total_bleu_score += bleu_score
                num_sentences += 1

            break  # Evaluate only on the first batch for now

    avg_bleu_score = total_bleu_score / num_sentences if num_sentences > 0 else 0
    print(f"Average BLEU-S Score: {avg_bleu_score:.4f}")
    return avg_bleu_score

def train_style_classifier(data_loader, vocab_size, device):
    classifier = StyleClassifier(vocab_size, 300, 128).to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

    classifier.train()
    print("\nTraining Style Classifier...\n")
    for epoch in range(20):
        total_loss = 0
        for input_tokens, labels, _ in data_loader:
            input_tokens = input_tokens.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            predictions = classifier(input_tokens)
            loss = criterion(predictions, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}/5, Loss: {total_loss / len(data_loader):.4f}")
    
    return classifier

def evaluate_style_transfer(data_loader, model, classifier, vocab, device):
    model.eval()
    classifier.eval()
    correct_predictions = 0
    total_predictions = 0

    print("\nEvaluating Style Transfer Accuracy...\n")
    with torch.no_grad():
        for input_tokens, labels, _ in data_loader:
            input_tokens = input_tokens.to(device)
            labels = labels.to(device)

            x_reconstructed, _, _, _ = model(input_tokens)
            x_reconstructed = x_reconstructed.argmax(dim=-1)

            style_predictions = classifier(x_reconstructed)
            style_labels = (style_predictions > 0.5).float()
            correct_predictions += (style_labels == labels).sum().item()
            total_predictions += labels.size(0)

            for i in range(min(5, len(input_tokens))):
                original_sentence = tokens_to_words(input_tokens[i].tolist(), vocab)
                reconstructed_sentence = tokens_to_words(x_reconstructed[i].tolist(), vocab)
                print(f"Original: {' '.join(original_sentence)}")
                print(f"Reconstructed: {' '.join(reconstructed_sentence)}")
                print(f"Style Prediction: {style_labels[i].item()}, True Style: {labels[i].item()}\n")

            break  # Evaluate only on the first batch for now

    accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
    print(f"Style Transfer Accuracy: {accuracy:.4f}")
    return accuracy

def run_evaluation(data_loader_train, data_loader_test, model, vocab, vocab_size, device):
    # Train Style Classifier
    classifier = train_style_classifier(data_loader_train, vocab_size, device)

    print("\n--- BLEU-S Score (Content Preservation) ---")
    bleu_score = calculate_bleu_score(data_loader_test, model, vocab, device)

    print("\n--- Style Transfer Accuracy ---")
    style_transfer_accuracy = evaluate_style_transfer(data_loader_test, model, classifier, vocab, device)

    print("\n--- Final Results ---")
    print(f"BLEU-S Score: {bleu_score:.4f}")
    print(f"Style Transfer Accuracy: {style_transfer_accuracy:.4f}")

    return bleu_score, style_transfer_accuracy

run_evaluation(data_loader, data_loader_test, model, vocab, len(vocab), device)

[nltk_data] Downloading package punkt to /home/qik/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



Training Style Classifier...

Epoch 1/5, Loss: 0.0954
Epoch 2/5, Loss: 0.0522
Epoch 3/5, Loss: 0.0387
Epoch 4/5, Loss: 0.0300
Epoch 5/5, Loss: 0.0241
Epoch 6/5, Loss: 0.0205
Epoch 7/5, Loss: 0.0178
Epoch 8/5, Loss: 0.0158
Epoch 9/5, Loss: 0.0145
Epoch 10/5, Loss: 0.0139
Epoch 11/5, Loss: 0.0132
Epoch 12/5, Loss: 0.0124
Epoch 13/5, Loss: 0.0116
Epoch 14/5, Loss: 0.0117
Epoch 15/5, Loss: 0.0116
Epoch 16/5, Loss: 0.0113
Epoch 17/5, Loss: 0.0113
Epoch 18/5, Loss: 0.0108
Epoch 19/5, Loss: 0.0105
Epoch 20/5, Loss: 0.0104

--- BLEU-S Score (Content Preservation) ---

BLEU-S: Evaluating content preservation...

Original: at this location the service was terrible .
Reconstructed: so brand scale crumbs crappy steady shogun shogun adds adds adds adds adds adds adds

Original: i ordered garlic bread and fettuccine alfredo pasta with vegetables .
Reconstructed: anyways a+ cancer crusted cancer middle towing crusted middle watered belgian camarones inspector inspector inspector

Original: i did n't

(0.0, 0.546875)