# K shot Model using GPT2 

In [1]:
import pandas as pd
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset from the Excel file
file_path = 'dataset.xlsx'
df = pd.read_excel(file_path, engine='openpyxl')
sentences = df.iloc[:, 0].tolist()
labels = df.iloc[:, 1].tolist()
labels2 = []
for label in labels:
    if label == 'a1':
        labels2.append(0)
    if label == 'a2':
        labels2.append(1)
    if label == 'a3':
        labels2.append(2)
    if label == 'a4':
        labels2.append(3)
    if label == 'a5':
        labels2.append(4)
labels = labels2

In [2]:
# Split the dataset into train and test sets (in the ratio 4:1)
train_sentences, test_sentences, train_labels, test_labels = train_test_split(sentences, labels, test_size=0.1, random_state=1)

# Define the N-way K-shot dataset
class FewShotDataset(Dataset):
    def __init__(self, sentences, labels, tokenizer, n_shot=5):
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = tokenizer
        # self.n_shot = n_shot
        

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx) -> tuple[torch.tensor, torch.tensor, torch.tensor, torch.tensor]:
        sentence = self.sentences[idx]
        label = self.labels[idx]

        # Check if the sentence is a string; if not, convert it to string
        if not isinstance(sentence, str):
            sentence = str(sentence)

        # Encode the sentence and handle possible exceptions
        encoded_sentence = self.tokenizer.encode(sentence, add_special_tokens=True)
        # print(encoded_sentence)
        return torch.tensor(encoded_sentence), torch.tensor(label)  # Assuming selected_positives and selected_negatives are not used in your current implementation
    

In [3]:
# Load pre-trained GPT-2 model and tokenizer (smaller version) and add a padding token
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # Add a [PAD] token to the vocabulary (for padding) which may be used by the tokenizer to encode sequences with different lengths

model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=5)
# Initialize your model and optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPT2ForSequenceClassification(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (score): Linear(in_features=768, out_features=5, bias=False)
)

In [6]:
# Prepare the train and test datasets and dataloaders
train_dataset = FewShotDataset(train_sentences, train_labels, tokenizer, n_shot=5)
test_dataset = FewShotDataset(test_sentences, test_labels, tokenizer, n_shot=5)

train_dataloader = DataLoader(train_dataset, shuffle=True)
test_dataloader = DataLoader(test_dataset, shuffle=False)

In [9]:
'''Create a neural network and train it on the dataset'''



# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_dataloader:
        inputs = batch[0].to(device)
        labels = batch[-1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs, labels = labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    average_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss:.4f}')



Epoch 1/3, Loss: 1.4644
Epoch 2/3, Loss: 0.8766
Epoch 3/3, Loss: 0.3268


In [10]:
#Test the model
model.eval()  # Set the model to evaluation mode

correct_predictions = 0
total_predictions = 0

predicted_labels = []
true_labels = []

with torch.no_grad():
    for batch in test_dataloader:
        inputs = batch[0].to(device)
        label = batch[1].to(device)
        outputs = model(inputs, labels = label)
        predicted = torch.softmax(input = outputs.logits, dim=1)
        predicted = torch.argmax(predicted)
        predicted_labels.append(int(predicted))
        true_labels.append(int(label))

        total_predictions += label.size(0)
        correct_predictions += (predicted == label).sum().item()

In [11]:
#Checking scores

accuracy = (correct_predictions / total_predictions) * 100
print(f'Testing Accuracy: {accuracy:.2f}%')

from sklearn.metrics import precision_score
precision = precision_score(true_labels, predicted_labels, average='weighted')
print(f'Precision: {precision:.2f}')



Testing Accuracy: 46.15%
Precision: 0.56
