In [1]:
import torch
from transformers import (
    AutoModelForCausalLM,
    LlamaTokenizerFast,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
)
from peft import (
    LoraConfig,
    get_peft_model,
)

2024-04-19 11:35:37.171708: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-19 11:35:37.280464: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-19 11:35:37.767893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
2024-04-19 11:35:37.767950: W tensorflow/compiler/xla/stream_exe

In [2]:
# https://huggingface.co/meta-llama/Llama-2-7b-hf
# base_model = "meta-llama/Llama-2-7b-hf"
base_model = "meta-llama/Llama-2-7b-chat-hf"
# base_model="beomi/open-llama-2-ko-7b"
# base_model = "huggingface-projects/llama-2-7b-chat"
# base_model = "TinyPixel/Llama-2-7B-bf16-sharded"

In [3]:
# # Fine-tuned model
# new_model = "llama-2-7b-hf-fine-tuned-test1"

In [3]:
# QLoRA 모델을 사용하기 위한 설정

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16 
)



In [4]:
# 라마2 모델 불러오기
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
# 토크나이저 로드
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false" # 토크나이저 병렬처리 방지(오류 방지)
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = 'true' # __cell__ 오류 방지

tokenizer = LlamaTokenizerFast.from_pretrained(
    base_model,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token # 패딩 토큰을 문장의 끝으로 설정 </s>
tokenizer.padding_side = "right" # 패딩을 문장 뒤에 추가

In [7]:
# 학습 양식
import json
from datasets import load_dataset
file_name = 'preprocess/dataset3/new_ratings_7.json'

In [8]:
import json

# JSON 파일 불러오기
input_file = file_name
with open(input_file, 'r') as f:
    data = json.load(f)

# 불러온 데이터 확인
# print(data)

In [9]:
# print(data[1])

print(data[0]["ratings"]) # 영화 평점들 정보 모음
print(type(data[0]["ratings"])) 

print(data[0]["ratings"][0]) # 평점 1개
print(type(data[0]["ratings"][0])) 
print(data[0]["rank"])

[{'title': 'Toy Story (1995)', 'rating': 4.0, 'imdbId': 'tt114709', 'timestamp': 964982703}, {'title': 'Grumpier Old Men (1995)', 'rating': 4.0, 'imdbId': 'tt113228', 'timestamp': 964981247}, {'title': 'Heat (1995)', 'rating': 4.0, 'imdbId': 'tt113277', 'timestamp': 964982224}, {'title': 'Seven (a.k.a. Se7en) (1995)', 'rating': 5.0, 'imdbId': 'tt114369', 'timestamp': 964983815}, {'title': 'Usual Suspects, The (1995)', 'rating': 5.0, 'imdbId': 'tt114814', 'timestamp': 964982931}, {'title': 'From Dusk Till Dawn (1996)', 'rating': 3.0, 'imdbId': 'tt116367', 'timestamp': 964982400}, {'title': 'Bottle Rocket (1996)', 'rating': 5.0, 'imdbId': 'tt115734', 'timestamp': 964980868}, {'title': 'Braveheart (1995)', 'rating': 4.0, 'imdbId': 'tt112573', 'timestamp': 964982176}, {'title': 'Rob Roy (1995)', 'rating': 5.0, 'imdbId': 'tt114287', 'timestamp': 964984041}, {'title': 'Canadian Bacon (1995)', 'rating': 5.0, 'imdbId': 'tt109370', 'timestamp': 964984100}]
<class 'list'>
{'title': 'Toy Story (1

In [10]:
instruction = '''
I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.
'''

In [11]:
# 데이터 매핑 함수 정의
def map_data_to_format(example):

    user_ratings_str = json.dumps(example['ratings'], ensure_ascii=False)
    rank_list_str = json.dumps(example["rank"], ensure_ascii=False)
    
    text = (
        f"###instruction:\n{instruction}\n\n"
        f"user_rating_information:\n{user_ratings_str}\n\n"
        f"###response:\n{rank_list_str}\n\n"
    )
    
    # completion은 모델이 생성해야 할 예상 출력을 포함함
    # 여기서는 우선순위에 따라 정렬된 영화 imdb_id의 리스트를 반환합니다.
    completion = f"{{\"rank\": {rank_list_str}}}"
    
    return {'text': text, 'completion': completion}

In [12]:
# 데이터셋 로드
data = load_dataset('json', data_files=file_name, split="train")

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [13]:
print(data)

Dataset({
    features: ['rank', 'userId', 'ratings'],
    num_rows: 2000
})


In [15]:
# print(data['ratings'])


In [16]:
# print(data['order']) # [['tt109370', 'tt114287', 'tt114369', 'tt114814', 'tt115734', 'tt114709', 'tt113277', 'tt112573', 'tt113228', 'tt116367'], ...]

In [17]:
# print(data['userId']) # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ...]

In [14]:
# 데이터 매핑 적용
mapped_data = data.map(map_data_to_format)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [15]:
print(mapped_data)


Dataset({
    features: ['rank', 'userId', 'ratings', 'text', 'completion'],
    num_rows: 2000
})


In [16]:
# 데이터셋 분할
split_data = mapped_data.train_test_split(test_size=0.1)  # 10%를 테스트셋으로 사용

train_set = split_data['train']
eval_set = split_data['test']

train_set = train_set.map(lambda samples: tokenizer(samples["text"], padding=True, truncation=True, return_tensors="pt"), batched=True)
eval_set = eval_set.map(lambda samples: tokenizer(samples["text"], padding=True, truncation=True, return_tensors="pt"), batched=True)

# lora 파라미터 설정
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)


Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [17]:
train_set

Dataset({
    features: ['rank', 'userId', 'ratings', 'text', 'completion', 'input_ids', 'attention_mask'],
    num_rows: 1800
})

In [18]:
print(train_set[3]["text"])
print("\n")
print(type(train_set[3]["text"]))
print("\n")
# print(train_set[20]["text"])
print(train_set[79]["text"])

###instruction:

I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{"imdbId": "tt427152", "rating": 3.0, "timestamp": 1516155080, "title": "Dinner for Schmucks (2010)"}, {"imdbId": "tt1386588", "rating": 3.0, "timestamp": 1516141839, "title": "Other Guys, The (2010)"}, {"imdbId": "tt446029", "rating": 3.5, "timestamp": 1516141083, "title": "Scott Pilgrim vs. the World (2010)"}, {"imdbId": "tt1117523", "rating": 5.0, "timestamp": 1516153440, "title": "Jackass 2.5 (2007)"}, {"imdbId": "tt889573", "rating": 3.0, "timestamp": 1516152708, "title": "Switch, The (2010)"}, {"imdbId": "tt1285016", "rating": 3.5, "timestamp": 1516140981, "title": "Social Network, The (2010)"}, {"imdbId": "tt840361", "rating": 3.0, "timestamp": 1516140726, "title": "Town, The (2010)"}, {"imdbId": "tt1116184", "rating": 5.0, "timestamp": 1516153068, "title": "Jackass 3D (2010

In [19]:
print(eval_set[3]["text"])
# print(eval_set[20]["text"])
print(eval_set[9]["text"])


###instruction:

I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{"imdbId": "tt112792", "rating": 4.0, "timestamp": 832059371, "title": "Dangerous Minds (1995)"}, {"imdbId": "tt112431", "rating": 5.0, "timestamp": 832059080, "title": "Babe (1995)"}, {"imdbId": "tt113855", "rating": 3.0, "timestamp": 832059393, "title": "Mortal Kombat (1995)"}, {"imdbId": "tt113347", "rating": 4.0, "timestamp": 832059892, "title": "How to Make an American Quilt (1995)"}, {"imdbId": "tt114148", "rating": 2.0, "timestamp": 832059371, "title": "Pocahontas (1995)"}, {"imdbId": "tt113419", "rating": 4.0, "timestamp": 832059509, "title": "Indian in the Cupboard, The (1995)"}, {"imdbId": "tt113862", "rating": 5.0, "timestamp": 832059339, "title": "Mr. Holland's Opus (1995)"}, {"imdbId": "tt112379", "rating": 5.0, "timestamp": 832060151, "title": "Antonia's Line (Antonia

In [20]:
print(eval_set[90]["text"])
print(eval_set[50]["text"])
print(eval_set[12]["text"])

###instruction:

I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{"imdbId": "tt465430", "rating": 5.0, "timestamp": 1520409461, "title": "Cottage, The (2008)"}, {"imdbId": "tt1155056", "rating": 2.5, "timestamp": 1520408916, "title": "I Love You, Man (2009)"}, {"imdbId": "tt26676", "rating": 4.5, "timestamp": 1520409094, "title": "Man on the Flying Trapeze (1935)"}, {"imdbId": "tt489049", "rating": 2.0, "timestamp": 1520408893, "title": "Fanboys (2009)"}, {"imdbId": "tt1078912", "rating": 4.0, "timestamp": 1520408807, "title": "Night at the Museum: Battle of the Smithsonian (2009)"}, {"imdbId": "tt1119646", "rating": 3.5, "timestamp": 1520408816, "title": "Hangover, The (2009)"}, {"imdbId": "tt415679", "rating": 5.0, "timestamp": 1520409113, "title": "Boy Eats Girl (2005)"}, {"imdbId": "tt780567", "rating": 4.0, "timestamp": 1520408934, "title":

In [21]:
print(train_set[3]["completion"])
# print(train_set[20]["completion"])
print(train_set[79]["completion"])

{"rank": [{"title": "Jackass 2.5 (2007)"}, {"title": "Jackass 3D (2010)"}, {"title": "Red (2010)"}, {"title": "Due Date (2010)"}, {"title": "Scott Pilgrim vs. the World (2010)"}, {"title": "Social Network, The (2010)"}, {"title": "Dinner for Schmucks (2010)"}, {"title": "Switch, The (2010)"}, {"title": "Other Guys, The (2010)"}, {"title": "Town, The (2010)"}]}
{"rank": [{"title": "Perks of Being a Wallflower, The (2012)"}, {"title": "Django Unchained (2012)"}, {"title": "Hobbit: An Unexpected Journey, The (2012)"}, {"title": "Life of Pi (2012)"}, {"title": "Misérables, Les (2012)"}, {"title": "Hunt, The (Jagten) (2012)"}, {"title": "Oblivion (2013)"}, {"title": "Silver Linings Playbook (2012)"}, {"title": "Looper (2012)"}, {"title": "Taken 2 (2012)"}]}


In [23]:
model = get_peft_model(model, peft_params)

# prameter
epochs = 1 # 10
batch_size = 1
lr = 2e-4

training_params = TrainingArguments(
    output_dir="models7",
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=16,
    optim="adamw_torch",
    save_strategy="epoch",
    evaluation_strategy="steps",
    logging_strategy="steps",
    eval_steps=20,
    logging_steps=20,
    learning_rate=lr,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    dataloader_num_workers=1,
)


In [80]:
# ! wandb login --relogin
# torch.cuda.empty_cache()
# import gc
# gc.collect()
# import gc
# torch.cuda.empty_cache()
# gc.collect()

In [24]:
import transformers
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

trainer = Trainer(
    model=model,
    args=training_params,
    train_dataset=train_set,
    eval_dataset=eval_set,
    tokenizer=tokenizer,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


  0%|          | 0/112 [00:00<?, ?it/s]

{'loss': 0.79, 'grad_norm': 0.059538308531045914, 'learning_rate': 0.00018936326403234125, 'epoch': 0.18}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.6115174889564514, 'eval_runtime': 48.6369, 'eval_samples_per_second': 4.112, 'eval_steps_per_second': 0.514, 'epoch': 0.18}
{'loss': 0.5356, 'grad_norm': 0.05809849128127098, 'learning_rate': 0.00015000000000000001, 'epoch': 0.36}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.49864256381988525, 'eval_runtime': 48.4783, 'eval_samples_per_second': 4.126, 'eval_steps_per_second': 0.516, 'epoch': 0.36}
{'loss': 0.4872, 'grad_norm': 0.08045662194490433, 'learning_rate': 9.418551710895243e-05, 'epoch': 0.53}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.4929836690425873, 'eval_runtime': 48.5549, 'eval_samples_per_second': 4.119, 'eval_steps_per_second': 0.515, 'epoch': 0.53}
{'loss': 0.4937, 'grad_norm': 0.12181545048952103, 'learning_rate': 4.028414082972141e-05, 'epoch': 0.71}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.4955301284790039, 'eval_runtime': 48.8222, 'eval_samples_per_second': 4.096, 'eval_steps_per_second': 0.512, 'epoch': 0.71}
{'loss': 0.4892, 'grad_norm': 0.1044938787817955, 'learning_rate': 6.030737921409169e-06, 'epoch': 0.89}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.4941518306732178, 'eval_runtime': 48.5707, 'eval_samples_per_second': 4.118, 'eval_steps_per_second': 0.515, 'epoch': 0.89}
{'train_runtime': 2123.5741, 'train_samples_per_second': 0.848, 'train_steps_per_second': 0.053, 'train_loss': 0.5520987042358944, 'epoch': 1.0}


TrainOutput(global_step=112, training_loss=0.5520987042358944, metrics={'train_runtime': 2123.5741, 'train_samples_per_second': 0.848, 'train_steps_per_second': 0.053, 'train_loss': 0.5520987042358944, 'epoch': 1.0})

In [25]:
from tensorboard import notebook
log_dir = "./models7" 
notebook.start("--logdir {} --port 4000".format(log_dir))

Launching TensorBoard...

In [27]:
## test1

In [28]:
instruction = "I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format."

user_ratings_str = [{"imdbId": "tt199725", "rating": 3.0, "timestamp": 1044311397, "title": "Love and Basketball (2000)"}, {"imdbId": "tt171359", "rating": 3.0, "timestamp": 1044311426, "title": "Hamlet (2000)"}, {"imdbId": "tt187393", "rating": 3.0, "timestamp": 1044311108, "title": "Patriot, The (2000)"}, {"imdbId": "tt181875", "rating": 4.0, "timestamp": 1044311358, "title": "Almost Famous (2000)"}, {"imdbId": "tt180093", "rating": 5.0, "timestamp": 1044311310, "title": "Requiem for a Dream (2000)"}, {"imdbId": "tt120917", "rating": 4.0, "timestamp": 1044311744, "title": "Emperor's New Groove, The (2000)"}, {"imdbId": "tt181865", "rating": 5.0, "timestamp": 1044311310, "title": "Traffic (2000)"}, {"imdbId": "tt209144", "rating": 5.0, "timestamp": 1044311318, "title": "Memento (2000)"}, {"imdbId": "tt125022", "rating": 2.0, "timestamp": 1044311195, "title": "Heartbreakers (2001)"}, {"imdbId": "tt203009", "rating": 4.0, "timestamp": 1044311949, "title": "Moulin Rouge (2001)"}]

# order_list_str=["tt118971", "tt118883", "tt120102", "tt119488", "tt118884", "tt119345", "tt119174", "tt120177", "tt118842", "tt120399"]

In [29]:
# ###instruction:
# {instruction}

text = f'''
###instruction:
{instruction}


user_rating_information:
{user_ratings_str}


'''
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs["input_ids"].to("cuda"), 
    attention_mask=inputs["attention_mask"], 
    max_new_tokens=256,
    early_stopping=True,
    pad_token_id=tokenizer.eos_token_id
)
output = tokenizer.decode(outputs[0])
print(output)


<s> 
###instruction:
I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{'imdbId': 'tt199725', 'rating': 3.0, 'timestamp': 1044311397, 'title': 'Love and Basketball (2000)'}, {'imdbId': 'tt171359', 'rating': 3.0, 'timestamp': 1044311426, 'title': 'Hamlet (2000)'}, {'imdbId': 'tt187393', 'rating': 3.0, 'timestamp': 1044311108, 'title': 'Patriot, The (2000)'}, {'imdbId': 'tt181875', 'rating': 4.0, 'timestamp': 1044311358, 'title': 'Almost Famous (2000)'}, {'imdbId': 'tt180093', 'rating': 5.0, 'timestamp': 1044311310, 'title': 'Requiem for a Dream (2000)'}, {'imdbId': 'tt120917', 'rating': 4.0, 'timestamp': 1044311744, 'title': "Emperor's New Groove, The (2000)"}, {'imdbId': 'tt181865', 'rating': 5.0, 'timestamp': 1044311310, 'title': 'Traffic (2000)'}, {'imdbId': 'tt209144', 'rating': 5.0, 'timestamp': 1044311318, 'title': 'Memento (2000)'}, {'imdbId

In [30]:
## test2

In [31]:
instruction = "I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format."

user_ratings_str2 = [{"title": "Swingers (1996)"}, {"title": "Reservoir Dogs (1992)"}, {"title": "Sleepers (1996)"}, {"title": "Die Hard (1988)"}, {"title": "Robin Hood: Prince of Thieves (1991)"}, {"title": "Dirty Dancing (1987)"}, {"title": "Fish Called Wanda, A (1988)"}, {"title": "Angels in the Outfield (1994)"}, {"title": "Cool Runnings (1993)"}, {"title": "That Thing You Do! (1996)"}]

# order_list_str=["tt118971", "tt118883", "tt120102", "tt119488", "tt118884", "tt119345", "tt119174", "tt120177", "tt118842", "tt120399"]

In [32]:
# {instruction}

text = f'''
###instruction:
{instruction}


user_rating_information:
{user_ratings_str2}


'''
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs["input_ids"].to("cuda"), 
    attention_mask=inputs["attention_mask"], 
    max_new_tokens=256,
    early_stopping=True,
    pad_token_id=tokenizer.eos_token_id
)
output = tokenizer.decode(outputs[0])
print(output)

<s> 
###instruction:
I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{'title': 'Swingers (1996)'}, {'title': 'Reservoir Dogs (1992)'}, {'title': 'Sleepers (1996)'}, {'title': 'Die Hard (1988)'}, {'title': 'Robin Hood: Prince of Thieves (1991)'}, {'title': 'Dirty Dancing (1987)'}, {'title': 'Fish Called Wanda, A (1988)'}, {'title': 'Angels in the Outfield (1994)'}, {'title': 'Cool Runnings (1993)'}, {'title': 'That Thing You Do! (1996)'}]


###response:
[{"title": "Swingers (1996)"}, {"title": "Reservoir Dogs (1992)"}, {"title": "That Thing You Do! (1996)"}, {"title": "Die Hard (1988)"}, {"title": "Robin Hood: Prince of Thieves (1991)"}, {"title": "Fish Called Wanda, A (1988)"}, {"title": "Cool Runnings (1993)"}, {"title": "Sleepers (1996)"}, {"title": "Angels in the Outfield (1994)"}, {"title": "Dirty Dancing (1987)"}]

###response:
[{"title": "

In [33]:
## test3

In [34]:
instruction = "I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format."

user_ratings_str3 = [{"imdbId": "tt101846", "rating": 2.0, "timestamp": 996221356, "title": "F/X2 (a.k.a. F/X 2 - The Deadly Art of Illusion) (1991)"}, {"imdbId": "tt87277", "rating": 3.0, "timestamp": 996259355, "title": "Footloose (1984)"}, {"imdbId": "tt120903", "rating": 2.0, "timestamp": 996212879, "title": "X-Men (2000)"}, {"imdbId": "tt174480", "rating": 2.0, "timestamp": 996217683, "title": "Autumn in New York (2000)"}, {"imdbId": "tt99005", "rating": 5.0, "timestamp": 996256206, "title": "Air America (1990)"}, {"imdbId": "tt191397", "rating": 3.0, "timestamp": 996213058, "title": "Replacements, The (2000)"}, {"imdbId": "tt102510", "rating": 2.0, "timestamp": 996221323, "title": "Naked Gun 2 1/2: The Smell of Fear, The (1991)"}, {"imdbId": "tt204946", "rating": 2.0, "timestamp": 996215785, "title": "Bring It On (2000)"}, {"imdbId": "tt208988", "rating": 4.0, "timestamp": 996217599, "title": "Get Carter (2000)"}, {"imdbId": "tt212338", "rating": 3.0, "timestamp": 996213481, "title": "Meet the Parents (2000)"}]

# order_list_str=["tt118971", "tt118883", "tt120102", "tt119488", "tt118884", "tt119345", "tt119174", "tt120177", "tt118842", "tt120399"]

In [35]:
# {instruction}

text = f'''
###instruction:
{instruction}


user_rating_information:
{user_ratings_str3}


'''
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs["input_ids"].to("cuda"), 
    attention_mask=inputs["attention_mask"], 
    max_new_tokens=256,
    early_stopping=True,
    pad_token_id=tokenizer.eos_token_id
)
output = tokenizer.decode(outputs[0])
print(output)

<s> 
###instruction:
I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{'imdbId': 'tt101846', 'rating': 2.0, 'timestamp': 996221356, 'title': 'F/X2 (a.k.a. F/X 2 - The Deadly Art of Illusion) (1991)'}, {'imdbId': 'tt87277', 'rating': 3.0, 'timestamp': 996259355, 'title': 'Footloose (1984)'}, {'imdbId': 'tt120903', 'rating': 2.0, 'timestamp': 996212879, 'title': 'X-Men (2000)'}, {'imdbId': 'tt174480', 'rating': 2.0, 'timestamp': 996217683, 'title': 'Autumn in New York (2000)'}, {'imdbId': 'tt99005', 'rating': 5.0, 'timestamp': 996256206, 'title': 'Air America (1990)'}, {'imdbId': 'tt191397', 'rating': 3.0, 'timestamp': 996213058, 'title': 'Replacements, The (2000)'}, {'imdbId': 'tt102510', 'rating': 2.0, 'timestamp': 996221323, 'title': 'Naked Gun 2 1/2: The Smell of Fear, The (1991)'}, {'imdbId': 'tt204946', 'rating': 2.0, 'timestamp': 996215785, 

In [36]:
## test4

In [37]:
instruction = "I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format."

user_ratings_str4 = [{"imdbId": "tt328832", "rating": 4.5, "timestamp": 1525554402, "title": "Animatrix, The (2003)"}, {"imdbId": "tt339291", "rating": 4.0, "timestamp": 1526248533, "title": "Lemony Snicket's A Series of Unfortunate Events (2004)"}, {"imdbId": "tt371246", "rating": 4.0, "timestamp": 1525554894, "title": "Spanglish (2004)"}, {"imdbId": "tt375912", "rating": 4.5, "timestamp": 1532723284, "title": "Layer Cake (2004)"}, {"imdbId": "tt367594", "rating": 4.5, "timestamp": 1521822764, "title": "Charlie and the Chocolate Factory (2005)"}, {"imdbId": "tt347149", "rating": 3.5, "timestamp": 1525869033, "title": "Howl's Moving Castle (Hauru no ugoku shiro) (2004)"}, {"imdbId": "tt360486", "rating": 4.5, "timestamp": 1521490005, "title": "Constantine (2005)"}, {"imdbId": "tt401792", "rating": 4.5, "timestamp": 1521489360, "title": "Sin City (2005)"}, {"imdbId": "tt320661", "rating": 4.0, "timestamp": 1525554409, "title": "Kingdom of Heaven (2005)"}, {"imdbId": "tt121766", "rating": 4.5, "timestamp": 1521490052, "title": "Star Wars: Episode III - Revenge of the Sith (2005)"}]

# order_list_str=["tt118971", "tt118883", "tt120102", "tt119488", "tt118884", "tt119345", "tt119174", "tt120177", "tt118842", "tt120399"]

In [38]:
# {instruction}

text = f'''
###instruction:
{instruction}


user_rating_information:
{user_ratings_str4}


'''
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs["input_ids"].to("cuda"), 
    attention_mask=inputs["attention_mask"], 
    max_new_tokens=256,
    early_stopping=True,
    pad_token_id=tokenizer.eos_token_id
)
output = tokenizer.decode(outputs[0])
print(output)

<s> 
###instruction:
I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{'imdbId': 'tt328832', 'rating': 4.5, 'timestamp': 1525554402, 'title': 'Animatrix, The (2003)'}, {'imdbId': 'tt339291', 'rating': 4.0, 'timestamp': 1526248533, 'title': "Lemony Snicket's A Series of Unfortunate Events (2004)"}, {'imdbId': 'tt371246', 'rating': 4.0, 'timestamp': 1525554894, 'title': 'Spanglish (2004)'}, {'imdbId': 'tt375912', 'rating': 4.5, 'timestamp': 1532723284, 'title': 'Layer Cake (2004)'}, {'imdbId': 'tt367594', 'rating': 4.5, 'timestamp': 1521822764, 'title': 'Charlie and the Chocolate Factory (2005)'}, {'imdbId': 'tt347149', 'rating': 3.5, 'timestamp': 1525869033, 'title': "Howl's Moving Castle (Hauru no ugoku shiro) (2004)"}, {'imdbId': 'tt360486', 'rating': 4.5, 'timestamp': 1521490005, 'title': 'Constantine (2005)'}, {'imdbId': 'tt401792', 'rating': 

In [39]:
## test5

In [40]:
instruction = "I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format."

user_ratings_str5 = [{"imdbId": "tt129167", "rating": 5.0, "timestamp": 1161565763, "title": "Iron Giant, The (1999)"}, {"imdbId": "tt167404", "rating": 4.5, "timestamp": 1161520995, "title": "Sixth Sense, The (1999)"}, {"imdbId": "tt131325", "rating": 4.0, "timestamp": 1161529212, "title": "Bowfinger (1999)"}, {"imdbId": "tt94737", "rating": 4.0, "timestamp": 1161520603, "title": "Big (1988)"}, {"imdbId": "tt85334", "rating": 5.0, "timestamp": 1161528937, "title": "Christmas Story, A (1983)"}, {"imdbId": "tt120657", "rating": 3.5, "timestamp": 1161620716, "title": "13th Warrior, The (1999)"}, {"imdbId": "tt169547", "rating": 2.5, "timestamp": 1161520054, "title": "American Beauty (1999)"}, {"imdbId": "tt68473", "rating": 4.0, "timestamp": 1161563770, "title": "Deliverance (1972)"}, {"imdbId": "tt56443", "rating": 3.5, "timestamp": 1161531147, "title": "Sanjuro (Tsubaki Sanjûrô) (1962)"}, {"imdbId": "tt171804", "rating": 4.5, "timestamp": 1161564460, "title": "Boys Don't Cry (1999)"}]

# order_list_str=["tt118971", "tt118883", "tt120102", "tt119488", "tt118884", "tt119345", "tt119174", "tt120177", "tt118842", "tt120399"]

In [41]:
# {instruction}

text = f'''
###instruction:
{instruction}


user_rating_information:
{user_ratings_str5}


'''
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs["input_ids"].to("cuda"), 
    attention_mask=inputs["attention_mask"], 
    max_new_tokens=256,
    early_stopping=True,
    pad_token_id=tokenizer.eos_token_id
)
output = tokenizer.decode(outputs[0])
print(output)

<s> 
###instruction:
I will sort the movie list in descending order based on user rating information. I will show the title for the corresponding movie. It must return in JSON format.


user_rating_information:
[{'imdbId': 'tt129167', 'rating': 5.0, 'timestamp': 1161565763, 'title': 'Iron Giant, The (1999)'}, {'imdbId': 'tt167404', 'rating': 4.5, 'timestamp': 1161520995, 'title': 'Sixth Sense, The (1999)'}, {'imdbId': 'tt131325', 'rating': 4.0, 'timestamp': 1161529212, 'title': 'Bowfinger (1999)'}, {'imdbId': 'tt94737', 'rating': 4.0, 'timestamp': 1161520603, 'title': 'Big (1988)'}, {'imdbId': 'tt85334', 'rating': 5.0, 'timestamp': 1161528937, 'title': 'Christmas Story, A (1983)'}, {'imdbId': 'tt120657', 'rating': 3.5, 'timestamp': 1161620716, 'title': '13th Warrior, The (1999)'}, {'imdbId': 'tt169547', 'rating': 2.5, 'timestamp': 1161520054, 'title': 'American Beauty (1999)'}, {'imdbId': 'tt68473', 'rating': 4.0, 'timestamp': 1161563770, 'title': 'Deliverance (1972)'}, {'imdbId': 'tt5