### Loading model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# 加载支持生成的模型
local_model_path = "llama3.2_3b_lora_merge"  # 替换为适合的模型
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
model = AutoModelForCausalLM.from_pretrained(local_model_path)

### Input question

In [1]:


# input question
question = "Which model has achieved the highest Accuracy score on the Story Cloze Test benchmark dataset?"

# 输入提示
prompt = """
The Open Research Knowledge Graph (ORKG) is a semantic knowledge graph designed to represent, compare, and retrieve scholarly contributions. Given a natural language question, your task is to generate the corresponding SPARQL query that can be used to query the ORKG for the correct answer. Give me only the SPARQL query, no other text.
Input Question: {question}
Output SPARQL Query:

"""
inputs = tokenizer(prompt, return_tensors="pt")

# 生成文本
output_ids = model.generate(inputs["input_ids"], max_length=1024, temperature=0.7)
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("生成的文本:", generated_text)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.99s/it]
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


生成的文本: 
The Open Research Knowledge Graph (ORKG) is a semantic knowledge graph designed to represent, compare, and retrieve scholarly contributions. Given a natural language question, your task is to generate the corresponding SPARQL query that can be used to query the ORKG for the correct answer. Give me only the SPARQL query, no other text.
Input Question: {question}
Output SPARQL Query:

SELECT DISTINCT?paper?paper_lbl (SUM(?paper?paper_rdf['orkgc:contributionTo']?cont) AS?cont_num)
WHERE {
 ?cont?cont_lbl     a       orkgc:Contribution;
              rdfs:label      ?cont_lbl.
 ?cont?cont_rdf      orkgp:HAS_CONTRIBUTION     ?cont.
  OPTIONAL {?cont?cont_rdf      orkgp:P31           ?paper.}
  OPTIONAL {?cont?cont_rdf      orkgp:HAS_PAPER     ?paper.}
  OPTIONAL {?cont?cont_rdf      orkgp:P31           ?paper_lbl.}
  OPTIONAL {?cont?cont_rdf      orkgp:HAS_PAPER_lbl ?paper_lbl.}
 ?paper      orkgp:P31           ?paper_lbl.
 ?paper      orkgp:HAS_PAPER     ?paper.
} GROUP BY?paper?pa

### Save output sparql queries to a csv file

In [None]:
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc

#%cd /content/LLaMA-Factory/

args = dict(
  model_name_or_path="llama3.2_3b_lora_merge", # 使用 4 比特量化版 Llama-3-8b-Instruct 模型
#   adapter_name_or_path="llama3_lora",            # 加载之前保存的 LoRA 适配器
  template="llama3",                     # 和训练保持一致
  finetuning_type="lora",                  # 和训练保持一致
  # quantization_bit=4,                    # 加载 4 比特量化模型
)
chat_model = ChatModel(args)

messages = []
print("使用 `clear` 清除对话历史，使用 `exit` 退出程序。")
while True:
  query = input("\nUser: ")
  if query.strip() == "exit":
    break
  if query.strip() == "clear":
    messages = []
    torch_gc()
    print("对话历史已清除")
    continue

  messages.append({"role": "user", "content": query})
  print("Assistant: ", end="", flush=True)

  response = ""
  for new_text in chat_model.stream_chat(messages):
    print(new_text, end="", flush=True)
    response += new_text
  print()
  messages.append({"role": "assistant", "content": response})

torch_gc()

[INFO|configuration_utils.py:677] 2024-12-13 13:46:17,580 >> loading configuration file llama3.2_3b_lora_merge/config.json
[INFO|configuration_utils.py:746] 2024-12-13 13:46:17,581 >> Model config LlamaConfig {
  "_name_or_path": "llama3.2_3b_lora_merge",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 24,
  "num_hidden_layers": 28,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 32.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_type": "llama3"
  },
  "rope_theta": 500000.0,
  "tie_word_emb

[INFO|2024-12-13 13:46:18] llamafactory.data.template:157 >> Replace eos token: <|eot_id|>


[INFO|configuration_utils.py:677] 2024-12-13 13:46:18,279 >> loading configuration file llama3.2_3b_lora_merge/config.json
[INFO|configuration_utils.py:746] 2024-12-13 13:46:18,280 >> Model config LlamaConfig {
  "_name_or_path": "llama3.2_3b_lora_merge",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 24,
  "num_hidden_layers": 28,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 32.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_type": "llama3"
  },
  "rope_theta": 500000.0,
  "tie_word_emb

[INFO|2024-12-13 13:46:18] llamafactory.model.patcher:157 >> Using KV cache for faster generation.


[INFO|modeling_utils.py:3934] 2024-12-13 13:46:18,282 >> loading weights file llama3.2_3b_lora_merge/model.safetensors.index.json
[INFO|modeling_utils.py:1670] 2024-12-13 13:46:18,282 >> Instantiating LlamaForCausalLM model under default dtype torch.float32.
[INFO|configuration_utils.py:1096] 2024-12-13 13:46:18,283 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128008,
    128009
  ]
}

Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.69s/it]
[INFO|modeling_utils.py:4800] 2024-12-13 13:46:25,699 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4808] 2024-12-13 13:46:25,699 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at llama3.2_3b_lora_merge.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
[INFO|configuration_utils.py

[INFO|2024-12-13 13:46:26] llamafactory.model.model_utils.attention:157 >> Using torch SDPA for faster training and inference.
[INFO|2024-12-13 13:46:26] llamafactory.model.loader:157 >> all params: 3,212,749,824
使用 `clear` 清除对话历史，使用 `exit` 退出程序。
Assistant: SELECT DISTINCT?model?model_lbl
WHERE {
 ?metric     a       orkgc:Metric;
              rdfs:label ?metric_lbl.
  FILTER (str(?metric_lbl) = "Accuracy")
  {
    SELECT?model?model_lbl
    WHERE {
     ?dataset       a                orkgc:Dataset;
                      rdfs:label      ?dataset_lbl.
      FILTER (str(?dataset_lbl) = "Story Cloze Test")
     