In [None]:
import os
import time
import torch
import tqdm
import pyarrow as pa
import pyarrow.parquet as pq
from accelerate import Accelerator, DataLoaderConfiguration
from concurrent.futures import ThreadPoolExecutor, as_completed
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
TEMPLATE_DIR = '../00.data/00.wikidata/03.wikidata_template/'
TEMPLATE_NAME = ['00.original_template_500.parquet',
                 '01.subject_shuffled_template_500.parquet',
                 '02.object_shuffled_template_500.parquet',
                 '03.property_scoped_subject_shuffled_template_500.parquet',
                 '04.property_scoped_object_shuffled_template_500.parquet']

MODEL_DIR = '../01.models/'
MODEL_NAME = ['Meta/meta-llama/Llama-3.1-8B-Instruct/',
              'Mistral/mistralai/Mistral-Nemo-Instruct-2407/',
              'Qwen/Qwen3-8B/',
              'Qwen/Qwen3-14B/']
RESPONSE_DIR = '../00.data/01.model_response/01.HF_Models_response/'

LANGUAGE_LIST = ['en', 'fr', 'de', 'es', 'it', 'pt', 'ko', 'ja']

In [None]:
template_0 = pq.read_table(f"{TEMPLATE_DIR}{TEMPLATE_NAME[0]}").to_pandas()
template_1 = pq.read_table(f"{TEMPLATE_DIR}{TEMPLATE_NAME[1]}").to_pandas()
template_2 = pq.read_table(f"{TEMPLATE_DIR}{TEMPLATE_NAME[2]}").to_pandas()
template_3 = pq.read_table(f"{TEMPLATE_DIR}{TEMPLATE_NAME[3]}").to_pandas()
template_4 = pq.read_table(f"{TEMPLATE_DIR}{TEMPLATE_NAME[4]}").to_pandas()

In [None]:
def batch_inference(prompts, batch_size = 100):

    results = []
    
    for i in range(0, len(prompts), batch_size):
        batch = prompts[i: i + batch_size]
        inputs = tokenizer(
            batch, 
            return_tensors = 'pt', 
            padding = True, 
            truncation = True, 
            max_length = 128).to(accelerator.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens = 100,
                num_return_sequences = 1,
                do_sample = True, 
                temperature = 0.1,
                # attention_mask = inputs['attention_mask'],
                pad_token_id = tokenizer.pad_token_id,
                eos_token_id = tokenizer.eos_token_id,
                repetition_penalty = 1.3,
                return_dict_in_generate = True
            )
            
        gen_only = outputs.sequences[:, inputs["input_ids"].shape[1]:]
        decoded = tokenizer.batch_decode(gen_only, skip_special_tokens=True)
        results.extend(decoded)
    return results

def generate_responses(df, prefix, out_file = None):
    total_start = time.time()
    for lang in LANGUAGE_LIST:
        lang_start = time.time()
        prompt_col = f"prompt_{lang}"
        response_col = f"response_{lang}"
        print(f"Inference {prefix}: {lang} ")

        prompts = df[prompt_col].astype(str).tolist()
        if len(prompts) == 0:
            continue

        responses = batch_inference(prompts, batch_size = 6)
        df[response_col] = responses
        lang_elapsed = time.time() - lang_start
        print(f"Time -> {lang}:{lang_elapsed:.2f} 초")
    pq.write_table(pa.Table.from_pandas(df), out_file)
    print("저장 완료:", out_file)
    total_elapsed = time.time() - total_start
    print(f"총 소요 시간: {total_elapsed:.2f} 초")
    return df

def unload_model():
    global model, tokenizer, accelerator
    try:
        del model
    except:
        pass
    try:
        del tokenizer
    except:
        pass
    try:
        del accelerator
    except:
        pass
        
    import gc, torch
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()

    print("✓ Model fully unloaded (as much as possible without restarting kernel).")


In [None]:
unload_model()

In [None]:
unload_model()
i = 0
os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR + MODEL_NAME[i])
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

with torch.no_grad():
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_DIR + MODEL_NAME[i],
        device_map = {'':[0,1]},
    )
accelerator = Accelerator()
model = accelerator.prepare(model)

In [None]:
for i in range(5):
    unload_model()
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3,4,5'

    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR + MODEL_NAME[i])
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "left"

    with torch.no_grad():
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_DIR + MODEL_NAME[i],
            device_map = 'auto',
        )
    accelerator = Accelerator()
    model = accelerator.prepare(model)

    os.makedirs(RESPONSE_DIR + MODEL_NAME[i], exist_ok=True)
    MODEL_OUTPUT_DIR = RESPONSE_DIR + MODEL_NAME[i]
    copy_0 = template_0.copy()
    copy_1 = template_1.copy()
    copy_2 = template_2.copy()
    copy_3 = template_3.copy()
    copy_4 = template_4.copy()

    copy_0 = generate_responses(copy_0, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "00.original_response_100.parquet")
    copy_1 = generate_responses(copy_1, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "01.subject_shuffled_response_100.parquet")
    copy_2 = generate_responses(copy_2, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "02.object_shuffled_response_100.parquet")
    copy_3 = generate_responses(copy_3, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "03.property_scoped_subject_shuffled_response_100.parquet")
    copy_4 = generate_responses(copy_4, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "04.property_scoped_object_shuffled_response_100.parquet")

In [None]:
unload_model()
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3,4,5'

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR + MODEL_NAME[i])
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

with torch.no_grad():
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_DIR + MODEL_NAME[i],
        device_map = 'auto',
    )
accelerator = Accelerator()
model = accelerator.prepare(model)

os.makedirs(RESPONSE_DIR + MODEL_NAME[i], exist_ok=True)
MODEL_OUTPUT_DIR = RESPONSE_DIR + MODEL_NAME[i]
copy_0 = template_0.copy()
copy_1 = template_1.copy()
copy_2 = template_2.copy()
copy_3 = template_3.copy()
copy_4 = template_4.copy()

In [None]:
MODEL_OUTPUT_DIR

In [None]:
copy_0 = generate_responses(copy_0, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "00.original_response_100.parquet")
copy_1 = generate_responses(copy_1, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "01.subject_shuffled_response_100.parquet")
copy_2 = generate_responses(copy_2, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "02.object_shuffled_response_100.parquet")
copy_3 = generate_responses(copy_3, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "03.property_scoped_subject_shuffled_response_100.parquet")
copy_4 = generate_responses(copy_4, prefix=MODEL_NAME[i], out_file = MODEL_OUTPUT_DIR + "04.property_scoped_object_shuffled_response_100.parquet")