Load model and write inference function

In [1]:
# Load model
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Fsoft-AIC/XMAiNframe-instruct-7b")
model = AutoModelForCausalLM.from_pretrained("Fsoft-AIC/XMAiNframe-instruct-7b", device_map="auto")

messages=[
    {'from':'system', 'value': "You are a helpful assistant"},
    {'from': 'human', 'value': 'What is the future of Mainframe?'}
]

inputs = tokenizer.apply_chat_template(messages,
                                       add_generation_prompt=True,
                                       return_tensors="pt").to(model.device)
 
outputs = model.generate(inputs,
                         max_new_tokens=512,
                         do_sample=False,
                         top_k=50,
                         top_p=0.95,
                         num_return_sequences=1,
                         eos_token_id=tokenizer.eos_token_id
                        )

print(tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True))

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100015 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


The future of Mainframe is uncertain, as many organizations are gradually shifting their focus towards cloud computing and distributed systems. However, Mainframes will continue to play a vital role in critical applications and industries that require high levels of security, reliability, and performance.


In [2]:
def get_completion(sentence:str):
    
    messages=[
    {'from':'system', 'value': "You are a helpful assistant"},
    {'from': 'human', 'value': sentence}
    ]

    inputs = tokenizer.apply_chat_template(messages,
                                       add_generation_prompt=True,
                                       return_tensors="pt").to(model.device)

    outputs = model.generate(inputs,
                         max_new_tokens=512,
                         do_sample=False,
                         top_k=50,
                         top_p=0.95,
                         num_return_sequences=1,
                         eos_token_id=tokenizer.eos_token_id
                        )

    return tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)

In [3]:
get_completion("What is the future of Mainframe?")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100015 for open-end generation.


'The future of Mainframe is uncertain, as many organizations are gradually shifting their focus towards cloud computing and distributed systems. However, Mainframes will continue to play a vital role in critical applications and industries that require high levels of security, reliability, and performance.'

Load dataset

In [4]:
from datasets import load_dataset

# Load and preprocess dataset
QA_set = load_dataset("locchuong/Mainframe-QA-en-ja")
QA_set

DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'question', 'answer', 'anwser_ja', 'question_ja'],
        num_rows: 2598
    })
})

In [5]:
import random
from evaluate import load

In [6]:
bleu = load("bleu")

rouge = load('rouge')

In [7]:
total_sample = QA_set['train'].num_rows

random_idx = random.choice(range(total_sample))

question = QA_set['train'][random_idx]['question']

answer = QA_set['train'][random_idx]['answer']

completion = get_completion(question)

print("Question:\n",question)

print("Answer:\n",answer)

print("Completion:\n",completion)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100015 for open-end generation.


Question:
 What is the future of COBOL in the digital age?
Answer:
 Although COBOL is an older language, it continues to be relevant in certain industries and use cases, such as banking, finance, and government. As long as these industries need to maintain and update their legacy systems, COBOL will remain in demand. However, the pace of COBOL modernization and replacement with newer technologies will ultimately depend on the ability to efficiently migrate and maintain COBOL applications.
Completion:
 The future of COBOL in the digital age is uncertain, as many organizations are gradually phasing out COBOL systems in favor of modern technologies. However, there will still be a need for COBOL programmers due to the vast legacy systems that still rely on it.


In [8]:
bleu_score = bleu.compute(predictions=[completion], references=[answer])
bleu_score

{'bleu': 0.0,
 'precisions': [0.4897959183673469,
  0.08333333333333333,
  0.02127659574468085,
  0.0],
 'brevity_penalty': 0.6003730411984045,
 'length_ratio': 0.6621621621621622,
 'translation_length': 49,
 'reference_length': 74}

In [9]:
rouge_score = rouge.compute(predictions=[completion], references=[answer])
rouge_score

{'rouge1': 0.3636363636363637,
 'rouge2': 0.037037037037037035,
 'rougeL': 0.16363636363636366,
 'rougeLsum': 0.16363636363636366}

Try to get BLEU and ROUGE score for all dataset

In [10]:
import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm

In [12]:
n = 500
QA_set_subset = QA_set['train'].select(range(n))

In [13]:
answers = []
completions = []

# Simple for loop with tqdm
for i in tqdm(range(n)):
    # Get question, answer and model's predict
    question = QA_set_subset[i]['question']
    answer = QA_set_subset[i]['answer']
    completion = get_completion(question)
    # Collect results for calcuate metrics
    answers.append(answer)
    completions.append(completion)

  0%|                                                                                                                                                                 | 0/500 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100015 for open-end generation.
  0%|▎                                                                                                                                                        | 1/500 [00:04<33:53,  4.08s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100015 for open-end generation.
  0%|▌                                                                                                              

In [14]:
bleu_score = bleu.compute(predictions=completions, references=answers)
bleu_score

{'bleu': 0.228683328094954,
 'precisions': [0.47811370546727094,
  0.2590514440966275,
  0.1758870427849796,
  0.12554139727575167],
 'brevity_penalty': 1.0,
 'length_ratio': 1.1050462786864461,
 'translation_length': 17431,
 'reference_length': 15774}

In [15]:
rouge_score = rouge.compute(predictions=completions, references=answers)
rouge_score

{'rouge1': 0.5190875594831748,
 'rouge2': 0.31195055300430263,
 'rougeL': 0.45307669743312057,
 'rougeLsum': 0.4531016415604454}

In [16]:
# Upload for reuse
dataset_name = "Mainframe-QA-en-ja-500"
QA_set_subset.push_to_hub(dataset_name)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/locchuong/Mainframe-QA-en-ja-500/commit/992819dd0754086f5dda4cc137c999167b8e6f31', commit_message='Upload dataset', commit_description='', oid='992819dd0754086f5dda4cc137c999167b8e6f31', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/locchuong/Mainframe-QA-en-ja-500', endpoint='https://huggingface.co', repo_type='dataset', repo_id='locchuong/Mainframe-QA-en-ja-500'), pr_revision=None, pr_num=None)