In [2]:
import bitsandbytes
import wandb
import random
import pandas as pd
import torch
import json
import wandb
import os
from datasets import Dataset, concatenate_datasets, load_dataset
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedTokenizerBase

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
!wandb.login()
os.environ["CUDA_VISIBLE_DEVICES"]="0"
os.environ["WANDB_LOG_MODEL"] = "checkpoint"
os.environ["WANDB_WATCH"] = "gradients"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
wandb_entity=""
wandb_project=""

/bin/bash: -c: line 2: syntax error: unexpected end of file


In [4]:
config ={
    "artifacts_path":'wandb-japan/llm-finetuning/model-164uv3h2:v0',
    "tuning_data_artifacts": {"Anthropic_hh_rlfh":'wandb-japan/llm-finetuning/Anthropic_hh_rlfh:v0',
                              "OpenAssistant_oasst1":"wandb-japan/llm-finetuning/OpenAssistant_oasst1:v0",
                              "databricks-dolly-15k-ja":"wandb-japan/llm-finetuning/databricks-dolly-15k-ja:v0"},
}

In [6]:
def perform_inference(dataset, model, tokenizer):
    response_keyword = "### 応答:"
    inferences = []

    for example in dataset:
        split_text = example["text"].split(response_keyword)
        input_text = split_text[0].strip() + "### 応答:"  # 入力テキスト
        correct_response = split_text[1].strip() if len(split_text) > 1 else ""

        inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
        outputs = model.generate(**inputs, max_length=1024)
        prediction_full = tokenizer.decode(outputs[0], skip_special_tokens=True)
        tokens_input = tokenizer.tokenize(input_text)
        tokens_prediction = tokenizer.tokenize(prediction_full)[len(tokens_input):]
        prediction = tokenizer.convert_tokens_to_string(tokens_prediction)

        inferences.append({
            "Input Text": input_text,
            "Correct Response": correct_response,
            "Prediction": prediction
        })
    return pd.DataFrame(inferences)

with wandb.init(entity=wandb_entity, project=wandb_project, config=config, job_type='inference') as run:
    
    # ... [Dataset preparation code] ...
    # Data set prepration 
    datasets = []
    # load Anthropic_hh_rlfh
    dataset_artifacts=run.use_artifact(run.config["tuning_data_artifacts"]["Anthropic_hh_rlfh"], type='dataset')
    dataset_artifacts_folder=dataset_artifacts.download()
    dataset = load_dataset('json', data_files=dataset_artifacts_folder+'/train_mpt_hhrlhf_49k_ja.json')
    dataset = dataset["train"]
    dataset = dataset.select_columns("text")
    datasets.append(dataset)
    # load OpenAssistant_oasst1
    dataset_artifacts=run.use_artifact(run.config["tuning_data_artifacts"]["OpenAssistant_oasst1"], type='dataset')
    dataset_artifacts_folder=dataset_artifacts.download()
    dataset = load_dataset('json', data_files=dataset_artifacts_folder+'/train_OpenAssistant_oasst1.json')
    dataset = dataset["train"]
    dataset = dataset.select_columns("text")
    datasets.append(dataset)

    # load databricks-dolly-15k-ja
    dataset_artifacts=run.use_artifact(run.config["tuning_data_artifacts"]["databricks-dolly-15k-ja"], type='dataset')
    dataset_artifacts_folder=dataset_artifacts.download()
    dataset = load_dataset('json', data_files=dataset_artifacts_folder+'/train_databricks-dolly-15k-ja.json')
    dataset = dataset["train"]
    dataset = dataset.select_columns("text")
    datasets.append(dataset)
    
    train_dataset = concatenate_datasets(datasets)
    train_dataset = train_dataset.train_test_split(test_size=0.1)
    random_indices = random.sample(range(len(train_dataset['test'])), 5)
    sampled_train_dataset = train_dataset['test'].select(random_indices)

    # Model Preparation
    artifacts = run.use_artifact(run.config["artifacts_path"])
    artifacts_path = artifacts.download()

    tokenizer = AutoTokenizer.from_pretrained(artifacts_path, use_fast=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

    with open(f"{artifacts_path}/adapter_config.json", encoding="utf-8") as f:
        adapter_config = json.load(f)
    adapterbase_model = adapter_config["base_model_name_or_path"]

    # Load and predict with base model
    model_base = AutoModelForCausalLM.from_pretrained(
        pretrained_model_name_or_path=adapterbase_model,
        trust_remote_code=True,
        device_map="auto",
        load_in_8bit=False,
        load_in_4bit=False
    )
    model_base.eval()

    # Inference with Base Model
    base_model_inferences = perform_inference(sampled_train_dataset, model_base, tokenizer)

    # Release GPU Memory
    #del model_base
    #torch.cuda.empty_cache()
    #import gc
    #gc.collect()

    # Load and predict with Peft Model
    model_finetuned = PeftModel.from_pretrained(model_base, artifacts_path)
    model_finetuned = model_finetuned.merge_and_unload()
    #model_finetuned = AutoModelForCausalLM.from_pretrained(artifacts_path, device_map="auto")
    model_finetuned.eval()

    # Inference with Peft Model
    peft_model_inferences = perform_inference(sampled_train_dataset, model_finetuned, tokenizer)

    # Log Results
    peft_model_inferences_reduced = peft_model_inferences.drop(columns=['Input Text', 'Correct Response'])
    df_combined = pd.concat([base_model_inferences, peft_model_inferences_reduced], axis=1)
    df_combined.columns = ['Input Text', 'Correct Response', 'Base Model Prediction', 'Peft Model Prediction']
    data_table_combined = wandb.Table(dataframe=df_combined)
    run.log({"combined_predictions": data_table_combined})

[34m[1mwandb[0m: Downloading large artifact Anthropic_hh_rlfh:v0, 68.05MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.3
[34m[1mwandb[0m: Downloading large artifact OpenAssistant_oasst1:v0, 114.69MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.4
[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   7 of 7 files downloaded.  
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for o