In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    GenerationConfig,
    TextStreamer,
    pipeline,
)

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir="/home/congnguyen/drive/.cache")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, device_map="auto", cache_dir="/home/congnguyen/drive/.cache", torch_dtype=torch.float16, load_in_8bit=True
)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


CUDA SETUP: CUDA runtime path found: /home/congnguyen/miniconda3/envs/myenv/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 116
CUDA SETUP: Loading binary /home/congnguyen/miniconda3/envs/myenv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda116.so...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [26]:

from bs4 import BeautifulSoup
import re
import json

def get_article(articles):
    result = {}
    current_statue = "(non-statute)"
    for i in re.split(r"(.*)", articles.strip()):
        if len(i) == 0 or i == "\n":
            continue
        if re.search(r"^\(.*\)$", i):
            current_statue = i.strip()
            if current_statue not in result:
                result.update({current_statue: []})
        else:
            if current_statue not in result:
                result.update({current_statue: []})
            result[current_statue].append(i)
    return result

def build_test(filename):
    result = {}
    with open(filename, 'r') as f:
        data = f.read()

    data = BeautifulSoup(data, "xml").find_all('pair')
    for i in data:
        id = i.get('id')
        result.update({id: {}})
        result[id].update({"label": i.get('label')})
        articles = i.find('t1').text.replace("\n", " ").strip()
        # articles = get_article(articles)
        result[id].update({"statute": articles})
        result[id].update({"query": i.find('t2').text.strip()})
    return result

def write_json(filename, data):
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

In [27]:
test_file = "../data/COLIEE2024statute_data-English/train/riteval_R04_en.xml"
data = build_test(test_file)
data

{'R04-01-I': {'label': 'N',
  'statute': "(Juridical Acts by Minors) Article 5 (1) A minor must obtain the consent of the minor's legal representative to perform a juridical act;provided, however, that this does not apply to a juridical act for merely acquiring a right or being released from an obligation. (2) A juridical act in contravention of the provisions of the preceding paragraph is voidable. (Permission for Minors to Conduct Business) Article 6 (1) A minor who is permitted to conduct one or multiple types of business has the same capacity to act as an adult as far as that business is concerned.",
  'query': 'A juridical act by a minor who is permitted to conduct business may not be rescinded even if the judicial act does not concern the business.'},
 'R04-01-U': {'label': 'N',
  'statute': "(Juridical Acts by Minors) Article 5 (1) A minor must obtain the consent of the minor's legal representative to perform a juridical act;provided, however, that this does not apply to a jurid

In [28]:
def format_prompt(prompt, system_prompt=""):
    if system_prompt.strip():
        return f"[INST] {system_prompt} {prompt} [/INST]"
    return f"[INST] {prompt} [/INST]"

In [33]:
result = {}
for id in data:
    context = data[id]['statute']
    query = data[id]['query']
    prompt = "EXAM\n1. Answer by yes or no.\n\nDocument: {{premise}}\nQuestion: {{hypothesis}}? "
    prompt = prompt.replace("{{premise}}", context).replace("{{hypothesis}}", query)
    # prompt = format_prompt(prompt)
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=5)
    out = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "")
    print(id, ": ", out)
    result.update({id: out})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-01-I :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-01-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-01-E :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-02-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-02-U :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-02-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-03-A :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-03-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-03-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-03-E :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-03-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-04-A :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-04-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-04-U :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-04-E :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-04-O :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-05-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-06-A :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-07-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-07-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-07-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-08-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-08-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-09-A :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-09-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-09-U :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-10-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-10-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-11-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-11-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-11-E :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-12-A :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-12-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-13-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-13-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-13-O :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-14-A :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-14-U :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-14-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-14-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-16-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-16-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-16-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-16-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-18-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-18-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-18-O :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-19-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-19-I :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-19-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-19-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-19-O :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-20-A :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-20-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-20-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-20-E :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-20-O :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-21-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-21-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-21-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-21-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-22-A :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-22-I :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-22-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-23-A :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-23-I :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-23-U :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-23-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-23-O :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-24-I :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-24-U :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-24-E :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-24-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-25-A :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-25-I :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-25-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-25-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-25-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-26-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-26-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-26-U :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-26-E :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-27-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-27-I :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-27-U :  
Answer: No


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-27-E :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-27-O :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-28-A :  
Answer: Yes


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-28-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-28-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-28-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-29-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-29-O :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-33-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-36-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-36-I :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-36-U :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-36-E :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-37-A :  

Answer:


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


R04-37-I :  
Answer: No
R04-37-U :  

Answer:


In [32]:
f = open("../data/infer/R04_mistral.xml", "w")
for k in result:
    if "Yes" in result[k]:
        f.write(k + " Y " + "JNLP"+"\n")
    elif "No" in result[k]:
        f.write(k + " N " + "JNLP"+"\n")