In [1]:
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="fp4",
    bnb_4bit_compute_dtype=torch.float16
)

device = "cuda:0"

model_path = "../model_output/embedder_icl_finetune_qwen14b_ep2_ds2_fix_emb_old_saver/merged_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModel.from_pretrained(model_path, quantization_config=bnb_config, device_map=device)

print(model)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 12/12 [00:13<00:00,  1.11s/it]


Qwen2Model(
  (embed_tokens): Embedding(152064, 5120)
  (layers): ModuleList(
    (0-47): 48 x Qwen2DecoderLayer(
      (self_attn): Qwen2SdpaAttention(
        (q_proj): Linear4bit(in_features=5120, out_features=5120, bias=True)
        (k_proj): Linear4bit(in_features=5120, out_features=1024, bias=True)
        (v_proj): Linear4bit(in_features=5120, out_features=1024, bias=True)
        (o_proj): Linear4bit(in_features=5120, out_features=5120, bias=False)
        (rotary_emb): Qwen2RotaryEmbedding()
      )
      (mlp): Qwen2MLP(
        (gate_proj): Linear4bit(in_features=5120, out_features=13824, bias=False)
        (up_proj): Linear4bit(in_features=5120, out_features=13824, bias=False)
        (down_proj): Linear4bit(in_features=13824, out_features=5120, bias=False)
        (act_fn): SiLU()
      )
      (input_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)
      (post_attention_layernorm): Qwen2RMSNorm((5120,), eps=1e-06)
    )
  )
  (norm): Qwen2RMSNorm((5120,), eps=1e-06)
)


In [2]:
from peft import PeftModel
lora_path = "../model_output/embedder_icl_finetune_qwen14b_ep2_ds2_fix_emb_old_saver/lora_epoch_2"
lora_tokenizer = AutoTokenizer.from_pretrained(lora_path)
lora_model = AutoModel.from_pretrained("Qwen/Qwen2.5-14B-Instruct", quantization_config=bnb_config, device_map=device)
lora_model = PeftModel.from_pretrained(model, lora_path, is_trainable=False)
# model = model.merge_and_unload()

print(lora_model)


Loading checkpoint shards: 100%|██████████| 8/8 [00:09<00:00,  1.14s/it]


PeftModelForFeatureExtraction(
  (base_model): LoraModel(
    (model): Qwen2Model(
      (embed_tokens): Embedding(152064, 5120)
      (layers): ModuleList(
        (0-47): 48 x Qwen2DecoderLayer(
          (self_attn): Qwen2SdpaAttention(
            (q_proj): lora.Linear4bit(
              (base_layer): Linear4bit(in_features=5120, out_features=5120, bias=True)
              (lora_dropout): ModuleDict(
                (default): Dropout(p=0.1, inplace=False)
              )
              (lora_A): ModuleDict(
                (default): Linear(in_features=5120, out_features=32, bias=False)
              )
              (lora_B): ModuleDict(
                (default): Linear(in_features=32, out_features=5120, bias=False)
              )
              (lora_embedding_A): ParameterDict()
              (lora_embedding_B): ParameterDict()
              (lora_magnitude_vector): ModuleDict()
            )
            (k_proj): lora.Linear4bit(
              (base_layer): Linear4bit(in_featur

In [3]:
txt = "What is the sum of 1 and 2?"
inputs = tokenizer(txt, return_tensors="pt").to(device)
outputs = model(**inputs)
print(outputs)

lora_inputs = lora_tokenizer(txt, return_tensors="pt").to(device)
lora_outputs = lora_model(**lora_inputs)
print(lora_outputs)


BaseModelOutputWithPast(last_hidden_state=tensor([[[-1.3877, -0.4512, -1.1846,  ..., -0.1032,  0.5142, -0.4790],
         [ 0.5884, -0.0981, -2.5371,  ...,  0.0469, -0.0531, -0.4192],
         [ 0.8369,  1.5498, -1.8779,  ...,  0.5288, -0.1210, -0.9419],
         ...,
         [-0.3696, -2.7891,  1.2588,  ..., -0.2013,  1.8174, -0.6025],
         [ 1.9043, -2.4219,  1.6826,  ..., -0.0124,  0.9883,  1.2617],
         [ 1.4287, -2.4805,  2.4883,  ...,  0.9897,  1.2217,  1.1953]]],
       device='cuda:0', dtype=torch.float16), past_key_values=None, hidden_states=None, attentions=None)
BaseModelOutputWithPast(last_hidden_state=tensor([[[-1.3877, -0.4512, -1.1846,  ..., -0.1032,  0.5142, -0.4790],
         [ 0.5884, -0.0981, -2.5371,  ...,  0.0469, -0.0531, -0.4192],
         [ 0.8369,  1.5498, -1.8779,  ...,  0.5288, -0.1210, -0.9419],
         ...,
         [-0.3696, -2.7891,  1.2588,  ..., -0.2013,  1.8174, -0.6025],
         [ 1.9043, -2.4219,  1.6826,  ..., -0.0124,  0.9883,  1.2617],


In [4]:
lora_model = lora_model.merge_and_unload()

lora_inputs = lora_tokenizer(txt, return_tensors="pt").to(device)
lora_outputs = lora_model(**lora_inputs)
print(lora_outputs)




BaseModelOutputWithPast(last_hidden_state=tensor([[[-1.3418, -0.4719, -1.3691,  ..., -0.0950,  0.4719, -0.8677],
         [-0.2158, -0.5044, -4.1523,  ...,  0.3062, -0.5068, -1.2725],
         [ 0.5977,  2.0566, -2.3086,  ...,  1.3379,  0.0278, -0.9976],
         ...,
         [-2.4219, -0.9570,  0.4705,  ..., -0.5708,  2.7812, -1.5029],
         [ 1.4199, -1.9814,  0.2976,  ..., -0.7939,  2.1152,  0.8828],
         [ 0.5498, -1.2559,  1.1729,  ...,  0.9023,  1.3574,  1.5029]]],
       device='cuda:0', dtype=torch.float16), past_key_values=None, hidden_states=None, attentions=None)


In [1]:
# Given scores and their original indexes
scores = [90, 85, 88, 92]
original_indexes = [2, 0, 3, 1]

# Pair each score with its original index
paired_list = list(zip(scores, original_indexes))

# Sort the paired list based on the original indexes
sorted_paired_list = sorted(paired_list, key=lambda x: x[1])

# Extract the scores in original order
original_order_scores = [score for score, index in sorted_paired_list]

# Display the result
print(original_order_scores)
# Output: [85, 92, 90, 88]

[85, 92, 90, 88]


In [2]:
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-14B-Instruct")


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
txt = "What is the sum of 1 and 2?"
len(tokenizer(txt)['input_ids'])


11

In [6]:
pairs = [['What is tasdas1 and 2?', 'What ?'], ['What is the sum of 1 and 2?', 'What d 2?'], ['What is the sum of 1 and 2?', 'What d 2?']]
len_pairs = [len(tokenizer(pair[0])['input_ids']) + len(tokenizer(pair[1])['input_ids']) for pair in pairs]
len_pairs


[11, 16, 16]

In [7]:
sorted_indices = sorted(range(len(len_pairs)), key=lambda k: len_pairs[k], reverse=True)
sorted_indices

[1, 2, 0]

In [1]:
import torch
args_path = '/root/autodl-tmp/github/FlagEmbedding/projects/model_output/reranker_ft_qwen14b_ep4_4gpu/training_args.bin'
args = torch.load(args_path)



  args = torch.load(args_path)
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
args_dict = vars(args)
for key, value in args_dict.items():
    print(f"{key}: {value}")

output_dir: ../model_output/reranker_ft_qwen14b_ep4_4gpu
overwrite_output_dir: True
do_train: False
do_eval: False
do_predict: False
eval_strategy: no
prediction_loss_only: False
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: 8
eval_accumulation_steps: None
eval_delay: 0
torch_empty_cache_steps: None
learning_rate: 0.0002
weight_decay: 0.01
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1e-08
max_grad_norm: 1.0
num_train_epochs: 4.0
max_steps: -1
lr_scheduler_type: linear
lr_scheduler_kwargs: {}
warmup_ratio: 0.05
warmup_steps: 0
log_level: passive
log_on_each_node: True
logging_dir: ../model_output/reranker_ft_qwen14b_ep4_4gpu/runs/Dec10_12-35-29_autodl-container-bf1a4f8116-4d59960b
logging_strategy: steps
logging_first_step: False
logging_steps: 1.0
logging_nan_inf_filter: True
save_strategy: steps
save_steps: 500
save_total_limit: 5
save_safetensors: True
save_on_each_node: False