# Batch 1: Import Libraries and Dataset Preparation


In [2]:
# Batch 1: Import Libraries and Dataset Preparation

import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2Model, GPT2PreTrainedModel
from transformers import AdamW
from tqdm import tqdm
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.nn import CrossEntropyLoss

# Define category, intent, and NER mappings
category_map = {
    "ACCOUNT": 0,
    "CANCELLATION_FEE": 1,
    "CONTACT": 2,
    "DELIVERY": 3,
    "FEEDBACK": 4,
    "INVOICE": 5,
    "ORDER": 6,
    "PAYMENT": 7,
    "REFUND": 8,
    "SHIPPING_ADDRESS": 9,
    "SUBSCRIPTION": 10
}

intent_map = {
    "create_account": 0,
    "delete_account": 1,
    "edit_account": 2,
    "recover_password": 3,
    "registration_problems": 4,
    "switch_account": 5,
    "check_cancellation_fee": 6,
    "contact_customer_service": 7,
    "contact_human_agent": 8,
    "delivery_options": 9,
    "delivery_period": 10,
    "complaint": 11,
    "review": 12,
    "check_invoice": 13,
    "get_invoice": 14,
    "cancel_order": 15,
    "change_order": 16,
    "place_order": 17,
    "track_order": 18,
    "check_payment_methods": 19,
    "payment_issue": 20,
    "check_refund_policy": 21,
    "get_refund": 22,
    "track_refund": 23,
    "change_shipping_address": 24,
    "set_up_shipping_address": 25,
    "newsletter_subscription": 26
}

ner_map = {
    "O": 0,  # Outside of a named entity
    "B-ORDER_NUMBER": 1,  # Beginning of an order number
    "I-ORDER_NUMBER": 2,  # Inside an order number
    "B-INVOICE_NUMBER": 3,  # Beginning of an invoice number
    "I-INVOICE_NUMBER": 4,  # Inside an invoice number
    "B-ONLINE_ORDER_INTERACTION": 5,  # Beginning of an online order interaction
    "I-ONLINE_ORDER_INTERACTION": 6,  # Inside an online order interaction
    "B-ONLINE_PAYMENT_INTERACTION": 7,  # Beginning of an online payment interaction
    "I-ONLINE_PAYMENT_INTERACTION": 8,  # Inside an online payment interaction
    "B-ONLINE_NAVIGATION_STEP": 9,  # Beginning of an online navigation step
    "I-ONLINE_NAVIGATION_STEP": 10,  # Inside an online navigation step
    "B-ONLINE_CUSTOMER_SUPPORT_CHANNEL": 11,  # Beginning of an online customer support channel
    "I-ONLINE_CUSTOMER_SUPPORT_CHANNEL": 12,  # Inside an online customer support channel
    "B-PROFILE": 13,  # Beginning of a profile
    "I-PROFILE": 14,  # Inside a profile
    "B-PROFILE_TYPE": 15,  # Beginning of a profile type
    "I-PROFILE_TYPE": 16,  # Inside a profile type
    "B-SETTINGS": 17,  # Beginning of settings
    "I-SETTINGS": 18,  # Inside settings
    "B-ONLINE_COMPANY_PORTAL_INFO": 19,  # Beginning of online company portal info
    "I-ONLINE_COMPANY_PORTAL_INFO": 20,  # Inside online company portal info
    "B-DATE": 21,  # Beginning of a date
    "I-DATE": 22,  # Inside a date
    "B-DATE_RANGE": 23,  # Beginning of a date range
    "I-DATE_RANGE": 24,  # Inside a date range
    "B-SHIPPING_CUT_OFF_TIME": 25,  # Beginning of a shipping cut-off time
    "I-SHIPPING_CUT_OFF_TIME": 26,  # Inside a shipping cut-off time
    "B-DELIVERY_CITY": 27,  # Beginning of a delivery city
    "I-DELIVERY_CITY": 28,  # Inside a delivery city
    "B-DELIVERY_COUNTRY": 29,  # Beginning of a delivery country
    "I-DELIVERY_COUNTRY": 30,  # Inside a delivery country
    "B-SALUTATION": 31,  # Beginning of a salutation
    "I-SALUTATION": 32,  # Inside a salutation
    "B-CLIENT_FIRST_NAME": 33,  # Beginning of a client's first name
    "I-CLIENT_FIRST_NAME": 34,  # Inside a client's first name
    "B-CLIENT_LAST_NAME": 35,  # Beginning of a client's last name
    "I-CLIENT_LAST_NAME": 36,  # Inside a client's last name
    "B-CUSTOMER_SUPPORT_PHONE_NUMBER": 37,  # Beginning of a customer support phone number
    "I-CUSTOMER_SUPPORT_PHONE_NUMBER": 38,  # Inside a customer support phone number
    "B-CUSTOMER_SUPPORT_EMAIL": 39,  # Beginning of a customer support email
    "I-CUSTOMER_SUPPORT_EMAIL": 40,  # Inside a customer support email
    "B-LIVE_CHAT_SUPPORT": 41,  # Beginning of live chat support
    "I-LIVE_CHAT_SUPPORT": 42,  # Inside live chat support
    "B-WEBSITE_URL": 43,  # Beginning of a website URL
    "I-WEBSITE_URL": 44,  # Inside a website URL
    "B-UPGRADE_ACCOUNT": 45,  # Beginning of an upgrade account
    "I-UPGRADE_ACCOUNT": 46,  # Inside an upgrade account
    "B-ACCOUNT_TYPE": 47,  # Beginning of an account type
    "I-ACCOUNT_TYPE": 48,  # Inside an account type
    "B-ACCOUNT_CATEGORY": 49,  # Beginning of an account category
    "I-ACCOUNT_CATEGORY": 50,  # Inside an account category
    "B-ACCOUNT_CHANGE": 51,  # Beginning of an account change
    "I-ACCOUNT_CHANGE": 52,  # Inside an account change
    "B-PROGRAM": 53,  # Beginning of a program
    "I-PROGRAM": 54,  # Inside a program
    "B-REFUND_AMOUNT": 55,  # Beginning of a refund amount
    "I-REFUND_AMOUNT": 56,  # Inside a refund amount
    "B-MONEY_AMOUNT": 57,  # Beginning of a money amount
    "I-MONEY_AMOUNT": 58,  # Inside a money amount
    "B-STORE_LOCATION": 59,  # Beginning of a store location
    "I-STORE_LOCATION": 60  # Inside a store location
}

class CustomDataset(Dataset):
    def __init__(self, data, tokenizer, max_length, category_map, intent_map, ner_map):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.category_map = category_map
        self.intent_map = intent_map
        self.ner_map = ner_map

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]  # Access by row index
        text = f"{row['instruction']} {row['response']}"  # NER input from instruction + response
        category = row['category']
        intent = row['intent']

        # Tokenize text with padding and truncation
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding="max_length",  # Padding will use the newly defined pad_token
            truncation=True,
            return_tensors="pt"
        )

        # Extract NER labels from the text (assuming a function `extract_ner_labels` exists)
        ner_labels = extract_ner_labels(text, self.ner_map)
        ner_label_ids = [self.ner_map[label] for label in ner_labels]
        ner_label_ids += [0] * (self.max_length - len(ner_label_ids))  # Pad NER labels to max_length

        # Handle unexpected categories and intents
        category_label = self.category_map.get(category, -1)  # Default to -1 if category not found
        intent_label = self.intent_map.get(intent, -1)        # Default to -1 if intent not found

        # Return inputs and task-specific labels
        return {
            "input_ids": encoding["input_ids"].squeeze(),
            "attention_mask": encoding["attention_mask"].squeeze(),
            "category": torch.tensor(category_label, dtype=torch.long),  # Map categories to numerical labels
            "intent": torch.tensor(intent_label, dtype=torch.long),      # Map intents to numerical labels
            "ner_labels": torch.tensor(ner_label_ids, dtype=torch.long)  # Map NER labels to numerical labels
        }

def extract_ner_labels(text, ner_map):
    # Dummy implementation for extracting NER labels from text
    # Replace this with your actual NER extraction logic
    words = text.split()
    labels = ["O"] * len(words)  # Default to "O" for all words
    return labels

# Load your dataset
df = pd.read_csv('../../dataset/Bitext.csv')


# Batch 2: DataLoader and Train-Validation Split


In [3]:
# Batch 2: DataLoader and Train-Validation Split

# Assuming df is your full dataset, apply train_test_split
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Initialize tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Set padding token to eos_token for GPT-2

# Create dataset instances
train_dataset = CustomDataset(train_df, tokenizer, max_length=128, category_map=category_map, intent_map=intent_map, ner_map=ner_map)
val_dataset = CustomDataset(val_df, tokenizer, max_length=128, category_map=category_map, intent_map=intent_map, ner_map=ner_map)

# Custom collate function to pad sequences to the same length
def collate_fn(batch):
    input_ids = [item['input_ids'] for item in batch]
    attention_mask = [item['attention_mask'] for item in batch]
    category = [item['category'] for item in batch]
    intent = [item['intent'] for item in batch]
    ner_labels = [item['ner_labels'] for item in batch]

    input_ids = torch.nn.utils.rnn.pad_sequence(input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
    attention_mask = torch.nn.utils.rnn.pad_sequence(attention_mask, batch_first=True, padding_value=0)
    category = torch.stack(category)
    intent = torch.stack(intent)
    ner_labels = torch.nn.utils.rnn.pad_sequence(ner_labels, batch_first=True, padding_value=0)

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'category': category,
        'intent': intent,
        'ner_labels': ner_labels
    }

# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)


# Batch 3: Model Setup


In [4]:
# Batch 3: Model Setup

class NERModel(GPT2PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.gpt2 = GPT2Model(config)
        self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels)
        self.init_weights()

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.gpt2(input_ids, attention_mask=attention_mask)
        sequence_output = outputs[0]
        logits = self.classifier(sequence_output)
        loss = None
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        return (loss, logits) if loss is not None else logits

class CategoryModel(GPT2PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.gpt2 = GPT2Model(config)
        self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels)
        self.init_weights()

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.gpt2(input_ids, attention_mask=attention_mask)
        pooled_output = outputs[0][:, -1, :]
        logits = self.classifier(pooled_output)
        loss = None
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        return (loss, logits) if loss is not None else logits

class IntentModel(GPT2PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.gpt2 = GPT2Model(config)
        self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels)
        self.init_weights()

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.gpt2(input_ids, attention_mask=attention_mask)
        pooled_output = outputs[0][:, -1, :]
        logits = self.classifier(pooled_output)
        loss = None
        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        return (loss, logits) if loss is not None else logits

# Initialize the models
ner_model = NERModel.from_pretrained("gpt2", num_labels=len(ner_map))  # Use the length of ner_map for num_labels
category_model = CategoryModel.from_pretrained("gpt2", num_labels=len(category_map))
intent_model = IntentModel.from_pretrained("gpt2", num_labels=len(intent_map))

# Move models to device
device = torch.device('cpu')
ner_model.to(device)
category_model.to(device)
intent_model.to(device)


Some weights of NERModel were not initialized from the model checkpoint at gpt2 and are newly initialized: ['classifier.bias', 'classifier.weight', 'gpt2.h.0.attn.c_attn.bias', 'gpt2.h.0.attn.c_attn.weight', 'gpt2.h.0.attn.c_proj.bias', 'gpt2.h.0.attn.c_proj.weight', 'gpt2.h.0.ln_1.bias', 'gpt2.h.0.ln_1.weight', 'gpt2.h.0.ln_2.bias', 'gpt2.h.0.ln_2.weight', 'gpt2.h.0.mlp.c_fc.bias', 'gpt2.h.0.mlp.c_fc.weight', 'gpt2.h.0.mlp.c_proj.bias', 'gpt2.h.0.mlp.c_proj.weight', 'gpt2.h.1.attn.c_attn.bias', 'gpt2.h.1.attn.c_attn.weight', 'gpt2.h.1.attn.c_proj.bias', 'gpt2.h.1.attn.c_proj.weight', 'gpt2.h.1.ln_1.bias', 'gpt2.h.1.ln_1.weight', 'gpt2.h.1.ln_2.bias', 'gpt2.h.1.ln_2.weight', 'gpt2.h.1.mlp.c_fc.bias', 'gpt2.h.1.mlp.c_fc.weight', 'gpt2.h.1.mlp.c_proj.bias', 'gpt2.h.1.mlp.c_proj.weight', 'gpt2.h.10.attn.c_attn.bias', 'gpt2.h.10.attn.c_attn.weight', 'gpt2.h.10.attn.c_proj.bias', 'gpt2.h.10.attn.c_proj.weight', 'gpt2.h.10.ln_1.bias', 'gpt2.h.10.ln_1.weight', 'gpt2.h.10.ln_2.bias', 'gpt2.h.1

IntentModel(
  (gpt2): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (classifier): Linear(in_features=768, out_features=27, bias=True)
)

# Batch 4: Loss Functions and Optimizer


In [6]:
# Batch 4: Loss Functions and Optimizer

def compute_loss(category_logits, intent_logits, ner_logits, category_labels, intent_labels, ner_labels):
    loss_fct = CrossEntropyLoss()
    category_loss = loss_fct(category_logits.view(-1, category_logits.size(-1)), category_labels.view(-1))
    intent_loss = loss_fct(intent_logits.view(-1, intent_logits.size(-1)), intent_labels.view(-1))
    ner_loss = loss_fct(ner_logits.view(-1, ner_logits.size(-1)), ner_labels.view(-1))
    return category_loss + intent_loss + ner_loss

# Initialize optimizer
optimizer = AdamW(list(ner_model.parameters()) + list(category_model.parameters()) + list(intent_model.parameters()), lr=2e-5)


# Batch 5: Training Loop


In [11]:
# Batch 5: Training Loop

def train_epoch(ner_model, category_model, intent_model, dataloader, optimizer, device):
    ner_model.train()
    category_model.train()
    intent_model.train()
    total_loss = 0
    batch_count = 0

    for batch in tqdm(dataloader, desc="Training Batches"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        category_labels = batch['category'].to(device)
        intent_labels = batch['intent'].to(device)
        ner_labels = batch['ner_labels'].to(device)

        # Step 1: Get NER logits (token-level task)
        ner_logits = ner_model(input_ids=input_ids, attention_mask=attention_mask)[1]

        # Step 2: Get category logits (class-level task)
        category_logits = category_model(input_ids=input_ids, attention_mask=attention_mask)[1]

        # Step 3: Get intent logits (class-level task)
        intent_logits = intent_model(input_ids=input_ids, attention_mask=attention_mask)[1]

        # Step 4: Compute loss for this batch
        loss = compute_loss(
            category_logits=category_logits,
            intent_logits=intent_logits,
            ner_logits=ner_logits,
            category_labels=category_labels,
            intent_labels=intent_labels,
            ner_labels=ner_labels,
        )

        # Step 5: Backward pass
        optimizer.zero_grad()  # Reset gradients
        loss.backward()        # Compute gradients
        optimizer.step()       # Update model weights

        # Accumulate total loss
        total_loss += loss.item()
        batch_count += 1

    # Return the average loss for the epoch
    avg_loss = total_loss / batch_count
    return avg_loss

# Main Training Loop
for epoch in range(3):  # Total number of epochs
    print(f"Epoch {epoch + 1}/3")
    
    # Train for one epoch
    avg_loss = train_epoch(ner_model, category_model, intent_model, train_dataloader, optimizer, device)
    
    # Print the average loss for the epoch
    print(f"Epoch {epoch + 1}, Average Loss: {avg_loss}")


Epoch 1/3


Training Batches:   0%|          | 0/1344 [00:00<?, ?it/s]

usage: ipykernel_launcher.py [-h] --train_file TRAIN_FILE --eval_file
                             EVAL_FILE --model MODEL [--bert_model BERT_MODEL]
                             [--xlnet_model XLNET_MODEL]
                             [--gpt2_model GPT2_MODEL]
                             [--gpt2_classification_type GPT2_CLASSIFICATION_TYPE]
                             [--train_batch_size TRAIN_BATCH_SIZE] [--gpu GPU]
                             [--eval_batch_size EVAL_BATCH_SIZE]
                             [--learning_rate LEARNING_RATE]
                             [--num_train_epochs NUM_TRAIN_EPOCHS]
                             [--prob_threshold PROB_THRESHOLD]
                             [--max_seq_length MAX_SEQ_LENGTH]
ipykernel_launcher.py: error: the following arguments are required: --train_file, --eval_file, --model


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Batch 6: Validation Loop


In [None]:
# Batch 6: Validation Loop

def validate(ner_model, category_model, intent_model, dataloader, device):
    ner_model.eval()
    category_model.eval()
    intent_model.eval()
    total_loss = 0
    batch_count = 0

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Validating Batches"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            category_labels = batch['category'].to(device)
            intent_labels = batch['intent'].to(device)
            ner_labels = batch['ner_labels'].to(device)

            # Step 1: Get NER logits (token-level task)
            ner_logits = ner_model(input_ids=input_ids, attention_mask=attention_mask)[1]

            # Step 2: Get category logits (class-level task)
            category_logits = category_model(input_ids=input_ids, attention_mask=attention_mask)[1]

            # Step 3: Get intent logits (class-level task)
            intent_logits = intent_model(input_ids=input_ids, attention_mask=attention_mask)[1]

            # Step 4: Compute loss for this batch
            loss = compute_loss(
                category_logits=category_logits,
                intent_logits=intent_logits,
                ner_logits=ner_logits,
                category_labels=category_labels,
                intent_labels=intent_labels,
                ner_labels=ner_labels,
            )

            # Accumulate total loss
            total_loss += loss.item()
            batch_count += 1

    # Return the average loss for the validation
    avg_loss = total_loss / batch_count
    return avg_loss

# Validate the models
avg_val_loss = validate(ner_model, category_model, intent_model, val_dataloader, device)
print(f"Average Validation Loss: {avg_val_loss}")


# Batch 7: Save the model


In [1]:
# Batch 7: Save Models and Tokenizer

# Save the NER model
ner_model.save_pretrained("path_to_save_ner_model")
# Save the Category model
category_model.save_pretrained("path_to_save_category_model")
# Save the Intent model
intent_model.save_pretrained("path_to_save_intent_model")
# Save the tokenizer
tokenizer.save_pretrained("path_to_save_tokenizer")


NameError: name 'ner_model' is not defined

# Batch 8: Inference


In [None]:
# Batch 8: Inference

def inference(ner_model, category_model, intent_model, tokenizer, text, device):
    # Tokenize text with padding and truncation
    encoding = tokenizer(
        text,
        max_length=128,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    input_ids = encoding["input_ids"].to(device)
    attention_mask = encoding["attention_mask"].to(device)

    # Step 1: Get NER logits (token-level task)
    ner_logits = ner_model(input_ids=input_ids, attention_mask=attention_mask)[1]
    ner_predictions = torch.argmax(ner_logits, dim=-1).cpu().numpy()

    # Step 2: Get category logits (class-level task)
    category_logits = category_model(input_ids=input_ids, attention_mask=attention_mask)[1]
    category_prediction = torch.argmax(category_logits, dim=-1).cpu().numpy()

    # Step 3: Get intent logits (class-level task)
    intent_logits = intent_model(input_ids=input_ids, attention_mask=attention_mask)[1]
    intent_prediction = torch.argmax(intent_logits, dim=-1).cpu().numpy()

    return ner_predictions, category_prediction, intent_prediction

# Example usage
text = "Your example text here"
ner_predictions, category_prediction, intent_prediction = inference(ner_model, category_model, intent_model, tokenizer, text, device)
print(f"NER Predictions: {ner_predictions}")
print(f"Category Prediction: {category_prediction}")
print(f"Intent Prediction: {intent_prediction}")
