### I. Import Libraries

In [1]:
# !pip install llmlingua
# !pip install openai==0.28
# !pip install spacy
# !python -m spacy download en_core_web_sm

# !pip install datasets
# !pip install nltk

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch.nn.functional as F
import openai
import nltk
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize

# Download necessary tokenizer data
nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Initialize OpenAI API key
openai.api_key = "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"

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


### II. Load model

In [3]:
# test
tokenizer = AutoTokenizer.from_pretrained("microsoft/llmlingua-2-xlm-roberta-large-meetingbank")
model = AutoModelForTokenClassification.from_pretrained("microsoft/llmlingua-2-xlm-roberta-large-meetingbank")

# Access the final classification layer
classification_layer = model.classifier

# Extract weights and bias
weights = classification_layer.weight  # Shape: (num_labels, hidden_size)
bias = classification_layer.bias       # Shape: (num_labels,)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


### III. Define functions

In [8]:
# GPT querying function
def query_gpt(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
    )
    return response['choices'][0]['message']['content']

# Metric M: BLEU for summarization
def compute_metric(ycomp, yorig):

  # Tokenize the strings
  reference_tokens = word_tokenize(ycomp)
  candidate_tokens = word_tokenize(yorig)

  # Compute BLEU score
  bleu_score = sentence_bleu([reference_tokens], candidate_tokens, weights=(1.0, 0.0, 0, 0))
  return bleu_score

### IV. Load Datasets

In [5]:
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
ds = load_dataset("openai/gsm8k", "main", split="train")
#dataloader = DataLoader(ds, batch_size=32, shuffle=True)
training_data = []
for idx, instance in enumerate(ds):
  if idx==20: break
  training_data.append("Question: "+instance['question']+instance['answer'])

### IV. Policy Optimization

In [69]:
# parameters for optimization
compression_rate = 0.6
tolerance = 5
r0 = -10.0
epochs = 5
lr=1e-5
entropy_weight=0.1 #lambda

In [71]:
#optimizer = optim.Adam(classification_layer.parameters(), lr=lr)
optimizer = optim.Adam([weights, bias], lr=lr)

for epoch in range(epochs):

  total_loss = 0

 # for batch_idx, batch_instance in enumerate(dataloader):
  for P in training_data:

    inputs = tokenizer(P, return_tensors="pt", padding=True, truncation=True)
    # Forward pass with output_hidden_states=True
    outputs = model(**inputs, output_hidden_states=True)
    # Hidden states: a tuple with one tensor per layer
    hidden_states = outputs.hidden_states
    logits = torch.matmul(hidden_states[0], weights.T) + bias
    probs = F.softmax(logits, dim=-1)
    predictions = torch.argmax(logits, dim=2)

    # Decode token labels
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    predicted_labels = [pred for pred in predictions[0]]

    # Preserved tokens
    Pc_tokens = []
    Pc_probs = []

    for token, label, prob in zip(tokens, predicted_labels, probs[0]):

     # if label==0:
      if prob[1].item()<0.3:  #temp fix
        continue

      Pc_tokens.append(token)
      Pc_probs.append(prob[1].item())

    Pc = " ".join(Pc_tokens)
    Pc_probs = torch.tensor(Pc_probs, requires_grad=True)

    # Query GPT outputs
    yorig = query_gpt(P)
    ycomp = query_gpt(Pc)

    # Calculate compression constraint
    delta = len(Pc_tokens) - compression_rate * len(P.split())

    # Compute reward
    if abs(delta) <= tolerance:
        reward = compute_metric(ycomp, yorig)
    else:
        reward = r0  # Penalize constraint violation

    # Compute loss L
    entropy = -torch.sum(probs * torch.log(probs))
    loss = -reward * torch.sum(torch.log(Pc_probs) ) - entropy_weight * entropy

    # Backpropagate and update policy
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss

  print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.6f}")

Epoch 1/5, Loss: -5508.287598
Epoch 2/5, Loss: -5930.135254
Epoch 3/5, Loss: -6402.196777
Epoch 4/5, Loss: -6394.845215
Epoch 5/5, Loss: -6377.056641


### VI. Inference

In [72]:
ds_test = load_dataset("openai/gsm8k", "main", split="test")

test_data = []
for idx, instance in enumerate(ds_test):
  if idx==3: break
  test_data.append("Question: "+instance['question']+instance['answer'])

In [78]:
# prediction
def get_compressed_tokens(text, compression_rate, weights, bias):
    inputs = tokenizer(text, return_tensors="pt")
    # Forward pass with output_hidden_states=True
    outputs = model(**inputs, output_hidden_states=True)
    # Hidden states: a tuple with one tensor per layer
    hidden_states = outputs.hidden_states
    logits = torch.matmul(hidden_states[0], weights.T) + bias
    probs = F.softmax(logits, dim=-1)
    predictions = torch.argmax(logits, dim=2)

    # Decode token labels
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    predicted_labels = [pred for pred in predictions[0]]

    # Preserved tokens
    Pc_tokens = []

    for token, label, prob in zip(tokens, predicted_labels, probs[0]):
      #if label==0:
      if prob[1].item()<0.3:
        continue

      Pc_tokens.append(token)

    Pc = "".join(Pc_tokens)
    return Pc


In [79]:
# test
tokenizer = AutoTokenizer.from_pretrained("microsoft/llmlingua-2-xlm-roberta-large-meetingbank")
model = AutoModelForTokenClassification.from_pretrained("microsoft/llmlingua-2-xlm-roberta-large-meetingbank")

# Access the final classification layer
classification_layer = model.classifier

# Extract weights and bias
weights_org = classification_layer.weight  # Shape: (num_labels, hidden_size)
bias_org = classification_layer.bias       # Shape: (num_labels,)

In [80]:
weights_org

Parameter containing:
tensor([[-0.0129,  0.0172, -0.0052,  ...,  0.0288, -0.0090,  0.0378],
        [-0.0337, -0.0032,  0.0047,  ..., -0.0004, -0.0192,  0.0056]],
       requires_grad=True)

In [81]:
weights

Parameter containing:
tensor([[-0.0089,  0.0204, -0.0010,  ...,  0.0328, -0.0047,  0.0358],
        [-0.0376, -0.0065,  0.0005,  ..., -0.0043, -0.0236,  0.0076]],
       requires_grad=True)

In [82]:
for i in range(3):

  Pc = get_compressed_tokens(test_data[i], compression_rate, weights_org, bias_org)
  Pc_rl = get_compressed_tokens(test_data[i], compression_rate, weights, bias)
  print("=======================")
  print("Original Promot")
  print(test_data[i])
  print("Compressed Tokens using LLMLingua2")
  print(Pc)
  print("Compressed Tokens after Policy Optimization")
  print(Pc_rl)

Original Promot
Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
She makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.
#### 18
Compressed Tokens using LLMLingua2
<s>▁Question:▁Janet’s▁ducks▁lay▁16▁eggs▁per▁day.▁She▁eats▁three▁for▁breakfast▁morning▁bakes▁muffins▁for▁her▁friends▁day▁with▁four.▁She▁sells▁the▁remainder▁at▁the▁farmers'▁market▁daily▁for▁$2▁per▁fresh▁duck▁egg.▁much▁in▁dollars▁does▁she▁day▁at▁the▁farmers'Janet▁sells▁16▁-▁3▁-▁4▁=16-3-499▁duck▁eggs▁a▁day.▁She▁makes▁9▁2▁=▁$<<9*21818▁day▁at▁the▁farmer’s.▁####▁18</s>
Compressed Tokens after Policy Optimization
<s>▁Question▁Janets▁ducks▁lay▁16s▁peratsess▁friendssders'▁duck▁eggsJanet▁sells▁16▁3▁4163-499▁ducks▁9▁2921818s▁18</s>
Ori

* This work is based on the paper https://arxiv.org/pdf/2409.13035v2
* The original paper aims to optimize the transformer encoder from LLMLingua2 via minimizing the difference between GPT3.5-generated feedbacks using original prompt and compressed prompt, while this work aims to optimize the weights&bias in the classification layer.
* Soft constrain on compression rate via a penalized term r0 during reward update
* Policy Optimization tends to keep numbers and operators from original prompt.


