In [8]:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from models.model_dpo import AutoDPOModelForSeq2SeqLM

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# Download the pre-trained model and tokenizer from the Hub
model_name = "google/flan-t5-small"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Initialize your model class and import the pre-trained model into your class
# Note that if you have a custom module in your class
# You should initialize the weights of this module in the `__init__` function
model_wrapper = AutoDPOModelForSeq2SeqLM(pretrained_model=model)

In [132]:
prompt = tokenizer("What is the capital of the USA?", return_tensors="pt")

In [133]:
outputs = model.generate(**prompt)
outputs

tensor([[  0,   3,   7, 152,  67, 839,   1]])

In [96]:
model.config.decoder_start_token_id

0

In [134]:
# Assuming `input_ids` has been obtained as shown previously
decoder_start_token_id = model.config.decoder_start_token_id

# Generate an initial decoder_input_ids using the start token. This is for demonstration; adjust as needed.
decoder_input_ids = torch.full((outputs.shape[0], 1), decoder_start_token_id, dtype=torch.long)

In [135]:
outputs

tensor([[  0,   3,   7, 152,  67, 839,   1]])

In [139]:
model_wrapper(input_ids=outputs, decoder_input_ids=decoder_input_ids)

{'logits': tensor([[[-39.8943,  -2.3258,  -7.0428,  ..., -39.8175, -39.8548, -39.6551]]],
        grad_fn=<UnsafeViewBackward0>),
 'hidden_states': None,
 'attentions': None}

In [137]:
tokenizer.decode(outputs[0])

'<pad> san diego</s>'

In [224]:
decoder_input_ids = tokenizer("The United", return_tensors="pt").input_ids  # Batch size 1

# preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model.
# This is not needed for torch's T5ForConditionalGeneration as it does this internally using labels arg.
decoder_input_ids = model._shift_right(decoder_input_ids)

In [225]:
log = model_wrapper(input_ids=prompt.input_ids, decoder_input_ids=decoder_input_ids)['logits']

In [226]:
tokenizer.decode(torch.argmax(log[:, -1, :]), skip_special_tokens=True)

'States'

In [227]:
tokenizer.decode(torch.argmax(log[:, 0, :]), skip_special_tokens=True)

''

In [228]:
log

tensor([[[-42.6578,  -2.4238,  -7.4038,  ..., -42.6205, -42.6900, -42.5286],
         [-39.2815,  -3.2432,  -6.3100,  ..., -39.3077, -39.4109, -39.2095],
         [-56.9624,  -4.4525, -12.2298,  ..., -57.0103, -57.0989, -57.1150]]],
       grad_fn=<UnsafeViewBackward0>)

In [192]:
decoder_input_ids

tensor([[   0,   37,  907, 1323,    3]])

In [164]:
torch.argmax(log[:, -1, :])

tensor(13)

In [68]:
tokenizer.decode(outputs[0], skip_special_tokens=True)

'san diego'

In [69]:
prompt = tokenizer("Can you bully me?", return_tensors="pt")
outputs = model.generate(**prompt)
tokenizer.decode(outputs[0], skip_special_tokens=True)

"i'm not sure"

In [242]:
test = {
    "prompt": ["Quelle est la capital de la Suisse?", "What is your favorite color"], 
    "chosen": ["It is Bern.", ""], 
    "rejected": ["It is Bern motherfucker.", "My favorite color is cat."], 
    "chosen_logps": None,
    "rejected_logps": None
}

In [359]:
test1 = {
    "prompt": ["Quelle est la capital de la Suisse?", "test"], 
    "chosen": ["Bern", "It is Bern. batch batch btach"], 
    "rejected": ["It is Bern.", "It is Bern."], 
    "chosen_logps": None,
    "rejected_logps": None
}

In [314]:
outputs

tensor([[  0,   3,   7, 152,  67, 839,   1]])

In [315]:
tokenizer.decode(model.config.pad_token_id)

'<pad>'

In [336]:
tokenizer(test1['chosen'][0])

{'input_ids': [8942, 1], 'attention_mask': [1, 1]}

In [339]:
torch.tensor([[1,2,3], [4,5,6]])

tensor([[1, 2, 3],
        [4, 5, 6]])

In [340]:
torch.tensor([[1,2,3], [4,5,6]])[:, [1,2]]

tensor([[2, 3],
        [5, 6]])

In [341]:
import torch

# Define the tensor
tensor = torch.tensor([
    [10, 20, 30],   # From this row, pick index 1 (20)
    [40, 50, 60],   # From this row, pick index 2 (60)
    [70, 80, 90]    # From this row, pick index 1 (80)
])

# Define the indices
indices = torch.tensor([1, 2, 1])

# Gather the elements
# dim=1 specifies that the indices refer to columns
selected_elements = torch.gather(tensor, 1, indices.unsqueeze(1)).squeeze()

print(selected_elements)

tensor([20, 60, 80])


In [360]:
model_wrapper.get_logprobs3(test1, tokenizer)

(tensor([ -5.4081, -80.3162]), tensor([-22.5394, -28.8716]))

In [331]:
model_wrapper.get_logprobs(test1, tokenizer)

tensor([[ 8942,     1,     0],
        [   50, 15126,     1]])
tensor(5.8746)


(tensor([-11.7493, -17.6239]), tensor([-78.6429, -31.4572]))

In [271]:
model_wrapper.get_logprobs2(test1, tokenizer)

tensor([[-5.1113]], grad_fn=<SelectBackward0>)
tensor([[[-44.3178,  -6.5826, -11.0753,  ..., -44.2438, -44.2820, -44.1999],
         [-57.5550,  -0.2968,  -8.4492,  ..., -57.5937, -57.7191, -57.5991]]],
       grad_fn=<LogSoftmaxBackward0>)


NameError: name 'chosen_logps' is not defined

In [344]:
import torch
import torch.nn.functional as F

# Example batched logits (each batch containing different examples)
logits = torch.tensor([
    [[2.0, 1.0, 0.1],
     [0.5, 1.5, 0.2],
     [0.3, 0.7, 0.0]],  # First batch

    [[1.2, 0.7, 0.3],
     [0.6, 0.4, 2.0],
     [1.0, 1.0, 1.0]]   # Second batch
])

# Corresponding batched labels
labels = torch.tensor([
    [0, 1, 2],  # Labels for the first batch
    [1, 2, 0]   # Labels for the second batch
])

# Compute log_softmax over the last dimension (features)
log_probs = F.log_softmax(logits, dim=2)
print(log_probs)
# Labels need to be the same dimensions as logits for gather
# Unsqueezing labels to [batch, sequence, 1] to align with gather requirements
gather_indices = labels.unsqueeze(2)

# Gather the log probabilities using the updated gather_indices
selected_log_probs = torch.gather(log_probs, 2, gather_indices).squeeze(2)

# Assuming '2' is the padding label, create a mask
mask = (labels != 2)

# Apply mask, replacing padded positions with 0 (or a very small number)
selected_log_probs_masked = selected_log_probs * mask.float()

# Now selected_log_probs_masked contains the log probabilities of non-padding tokens
print(selected_log_probs_masked)

tensor([[[-0.4170, -1.4170, -2.3170],
         [-1.4949, -0.4949, -1.7949],
         [-1.1733, -0.7733, -1.4733]],

        [[-0.6997, -1.1997, -1.5997],
         [-1.7705, -1.9705, -0.3705],
         [-1.0986, -1.0986, -1.0986]]])
tensor([[-0.4170, -0.4949, -0.0000],
        [-1.1997, -0.0000, -1.0986]])


In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch

# Load pre-trained T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Your dataset of 100 questions
questions = [
    "What is the capital of France?",
    "Who wrote 'To Kill a Mockingbird'?",
    # Add your 98 remaining questions here
]

# Batch size for inference
batch_size = 10

# Generate answers in batches
for i in range(0, len(questions), batch_size):
    batch_questions = questions[i:i+batch_size]
    inputs = tokenizer(batch_questions, return_tensors="pt", padding=True, truncation=True)

    # Generate model outputs
    with torch.no_grad():
        outputs = model.generate(input_ids=inputs["input_ids"], max_length=50, num_beams=4, early_stopping=True)

    # Decode and print the answers
    for idx, output in enumerate(outputs):
        decoded_output = tokenizer.decode(output, skip_special_tokens=True)
        print(f"Question: {batch_questions[idx]}")
        print(f"Answer: {decoded_output}")
        print("="*50)

In [10]:
model_wrapper.generate("What is your favortie color")

AttributeError: 'AutoDPOModelForSeq2SeqLM' object has no attribute 'generate'

In [13]:
model.generate(torch.tensor("What is your favorite color"))

TypeError: new(): invalid data type 'str'

In [16]:
# Load your JSON data
with open('datasets/small_test_file.json', 'r') as file:
    data = json.load(file)

# Function to generate answers for questions
def generate_answers(question):
    input_text = "question: " + question + " context:"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)

    # Generate model outputs
    with torch.no_grad():
        outputs = model.generate(input_ids=inputs["input_ids"])

    # Decode and return the answer
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Add answers to the JSON data as a new field
for item in data:
    question = item['query']
    answer = generate_answers(question)
    item['answer'] = answer
    print(answer)

# Save the modified JSON data back to a file
with open('modified_data.json', 'w') as file:
    json.dump(data, file)

print("Answers generated and added to the JSON data.")



Gracie and Joe's points are 1 + 2 + 1 = 4 points. 
$.80
There are 15 cakes left. There are 15 cakes left. There are 15 cakes left. There
1010_3$ is a base 10*10*10*10*10*
30
x
63
1
0.2
The books cost 6 * 6 = $60. The books cost 3 * 6 = $18.
         
$ div
$(x, y)$
Maximoff's monthly bill is $60 / month * 30 / 100 = $1
$dbinom1411$
8
If they have a total of $150, then they have a total of $150 * 5
He earned $160 / 60 = $160 for his first job. He earned $160
1
5
Brittany is 25 years old. Brittany is 3 years old. Brittany
-17
5
1 hour
3
Frank is selling each hamburger for $x and 2 people purchased 4 and another 2 customers purchased 2
n = int(input()) p = 0 for i in
3
0
$b - 1 = $0
6
She will need to apply for a grant of 40 / 100 * 30 = $1000
42
0
Peter drew 8 + 20 = 30 pictures. Quincy drew 40
If Betty's account balance is $3,456, then the total balance of both accounts
$1.5
2.
Three trays with 20 gingerbreads = 40 gingerbreads. Diane bakes 40 
He can answer each math problem for 2 minu