# Import Library

In [None]:
import torch
import random
import pandas as pd

from unsloth import FastLanguageModel
from datasets import load_dataset, Dataset
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments,
                          )
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from transformers import TrainingArguments

from transformers import TextStreamer

from huggingface_hub import create_repo

# Load Gemma 2B Instruct version model

In [11]:
max_seq_length = 2048
dtype = torch.float16
load_in_4bit = True

fourbit_models = [
    "unsloth/gemma-2b-bnb-4bit",
    "unsloth/gemma-2b-it-bnb-4bit", 
] 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-2b-it-bnb-4bit", 
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2024.9.post3: Fast Gemma patching. Transformers = 4.45.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.691 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


# Inference Example Before Fine-Tune

In [12]:
# Alpaca Prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Recommend a game based on the user's preferences.", # instruction
        """ecommend a game based on the user's preferences: Action, Casual
                            Interested Tags: Multiplayer
                            Platform: Windows, Mac
                            Age Restriction: 18 years and above""", # input
        "",
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<bos>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Recommend a game based on the user's preferences.

### Input:
ecommend a game based on the user's preferences: Action, Casual
                            Interested Tags: Multiplayer
                            Platform: Windows, Mac
                            Age Restriction: 18 years and above

### Response:
Based on the user's preferences, the following game recommendations would be suitable:

* **Warhammer: Darktide** (Action, Multiplayer, Platform: Windows, Mac, Age Restriction: 18+)
* **Minecraft** (Casual, Platform: Windows, Mac, Age Restriction: None)
* **Counter-Strike: Global Offensive** (Action, Multiplayer, Platform: Windows, Mac, Age Restriction: 16+)<eos>


# Data to Prompt
  
After you create the Alpaca Prompt Dataset by this step, You don't need it next time. 

In [3]:

df = pd.read_csv('./processed_data.csv')

def generate_instruction_response(row):
    user_genres = random.sample(df['genres'].explode().unique().tolist(), 2)
    user_tags = random.sample(df['tags'].explode().unique().tolist(), 2)
    user_platform = random.choice(['Windows', 'Mac', 'Linux'])
    user_age = random.choice([0, 12, 15, 18])

    # Instruction
    instruction = "Recommend a game based on the user's preferences."

    # Input
    input_text = f"""User Preferred Genres: {', '.join(user_genres)}
                    Interested Tags: {', '.join(user_tags)}
                    Platform: {user_platform}
                    Age Restriction: {user_age} years and above"""

    # Response
    response = f"""Recommended Game:
                - Name: {row['name']}
                - Release Date: {row['release_date']}
                - Price: {row['price']}
                - Platform: Windows/{'Supported' if row['windows'] else 'Not Supported'}, Mac/{'Supported' if row['mac'] else 'Not Supported'}, Linux/{'Supported' if row['linux'] else 'Not Supported'}
                - Genres: {row['genres']}
                - Tags: {row['tags']}
                - Game Description: {row['about_the_game']}"""

    return instruction, input_text, response

data = []
for index, row in df.iterrows():
    instruction, input_text, response = generate_instruction_response(row)
    data.append({'instruction': instruction, 'input': input_text, 'output': response})

prompt_response_df = pd.DataFrame(data)

# Alpaca Prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token  # Ensure you have defined your tokenizer

def formatting_prompts_func(examples):
    if isinstance(examples["instruction"], str):
        instructions = [examples["instruction"]]
        inputs = [examples["input"]]
        outputs = [examples["output"]]
    else:
        instructions = examples["instruction"]
        inputs = examples["input"]
        outputs = examples["output"]
    texts = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts }


# Instruction, Input, Output
prompt_response_df['instruction'] = "Recommend a game based on the user's preferences."

# To Dataset
dataset = Dataset.from_pandas(prompt_response_df[['instruction', 'input', 'output']])

# Formatting Prompts
formatted_dataset = dataset.map(formatting_prompts_func, batched=True)

# Save to JSONL
formatted_dataset.to_json("finetune_data_alpaca_ver2.jsonl", orient="records", lines=True)

print("Dataset has been saved to 'formatted_dataset.jsonl'.")

# Define Prompt Format Function(Alpaca)

In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token  # Ensure you have defined your tokenizer

def formatting_prompts_func(examples):
    if isinstance(examples["instruction"], str):
        instructions = [examples["instruction"]]
        inputs = [examples["input"]]
        outputs = [examples["output"]]
    else:
        instructions = examples["instruction"]
        inputs = examples["input"]
        outputs = examples["output"]
    texts = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts }

# Load Dataset

In [6]:
# Load JSONL File
loaded_dataset = load_dataset("json", data_files="finetune_data_alpaca_ver2.jsonl", split="train")

print(loaded_dataset[0])

{'instruction': "Recommend a game based on the user's preferences.", 'input': "User Preferred Genres: ['Indie', 'Racing', 'Free to Play'], ['Adventure', 'Indie', 'RPG', 'Simulation', 'Early Access']\nInterested Tags: ['Action', 'Adventure', 'Simulation', 'Exploration', 'Open World', 'Survival', 'Walking Simulator', 'Horror', 'Immersive Sim', '3D', 'First-Person', 'Realistic', '1980s', 'Alternate History', 'Atmospheric', 'Cold War', 'Military', 'Post-apocalyptic', 'Sci-fi', 'Thriller'], ['Simulation', 'Flight', 'Realistic', 'Physics', '3D', 'Singleplayer', 'Multiplayer', 'Local Multiplayer', 'Racing', 'Vehicular Combat', 'VR', 'Colorful', 'First-Person', 'Third Person', 'Jet', 'PvP', 'Character Customization', 'Controller', 'Level Editor', 'Open World']\nPlatform: Mac\nAge Restriction: 12 years and above", 'output': "Recommended Game:\n- Name: Galactic Bowling\n- Release Date: Oct 21, 2008\n- Price: 19.99\n- Platform: Windows/Supported, Mac/Not Supported, Linux/Not Supported\n- Genres: 

# Setting Lora and Train

In [2]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",    
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False, 
    loftq_config = None, 
)

Unsloth 2024.9.post3 patched 18 layers with 18 QKV layers, 18 O layers and 18 MLP layers.


In [4]:
output_dir = "gemma_steam_instructor"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
)
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    gradient_checkpointing=True,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=5e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=False,
    evaluation_strategy='no',
    eval_steps = 500,
    eval_accumulation_steps=1,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
)



In [7]:
trainer = SFTTrainer(
    model=model,
    train_dataset=loaded_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=max_seq_length,
    args=training_arguments,
    packing=False
)



In [9]:
# Train
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 97,410 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 8
\        /    Total batch size = 8 | Total steps = 12,176
 "-____-"     Number of trainable parameters = 19,611,648


Step,Training Loss
25,3.2012
50,2.2243
75,1.8662
100,1.7683
125,1.7344
150,1.6891
175,1.6636
200,1.65
225,1.7123
250,1.7308


TrainOutput(global_step=12176, training_loss=1.5586655203953053, metrics={'train_runtime': 21452.8897, 'train_samples_per_second': 4.541, 'train_steps_per_second': 0.568, 'total_flos': 8.470954033960919e+17, 'train_loss': 1.5586655203953053, 'epoch': 0.9999794682270814})

In [33]:
# Save model
trainer.save_model()
tokenizer.save_pretrained(output_dir)

('gemma_steam_instructor/tokenizer_config.json',
 'gemma_steam_instructor/special_tokens_map.json',
 'gemma_steam_instructor/tokenizer.model',
 'gemma_steam_instructor/added_tokens.json',
 'gemma_steam_instructor/tokenizer.json')

# Inference

In [None]:
# Load Model
model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = 'gemma_steam_instructor',
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model)

In [8]:
# alpaca_prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

inputs = tokenizer(
[
    alpaca_prompt.format(
        "Recommend a game based on the user's preferences.", # instruction
        """ecommend a game based on the user's preferences: Action, Casual
                            Interested Tags: Multiplayer
                            Platform: Windows, Mac
                            Age Restriction: 18 years and above""", # input
        "",
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

==((====))==  Unsloth 2024.9.post3: Fast Gemma patching. Transformers = 4.45.1.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.691 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
<bos>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Recommend a game based on the user's preferences.

### Input:
ecommend a game based on the user's preferences: Action, Casual
                            Interested Tags: Multiplayer
                            Platform: Windows, Mac
                            Age Restriction: 18 years and above

### Response:
Recommended Game:
- Name: The Last Day of the Dead
- Release Date: Oct 26, 2023
- Price: 1.99
- Platform: Windows/Supported, Mac

In [14]:
# Create Repository in Hugging Face
create_repo("RICALISMINE/Steam_Game_Recommender_Gemma", private = False, token = YOUR_TOKEN )

RepoUrl('https://huggingface.co/RICALISMINE/Steam_Game_Recommender_Gemma', endpoint='https://huggingface.co', repo_type='model', repo_id='RICALISMINE/Steam_Game_Recommender_Gemma')

In [16]:
# Merge to 16bit / Push the model
model.save_pretrained_merged("Steam_Game_Recommender_Gemma", tokenizer, save_method = "merged_16bit",)
model.push_to_hub_merged("Steam_Game_Recommender_Gemma", tokenizer, save_method = "merged_16bit", token = YOUR_TOKEN )

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 77.62 out of 125.59 RAM for saving.


100%|██████████| 18/18 [00:00<00:00, 361.44it/s]

Unsloth: Saving tokenizer...




 Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Done.
Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 77.62 out of 125.59 RAM for saving.


100%|██████████| 18/18 [00:00<00:00, 461.33it/s]


Unsloth: Saving to organization with address RICALISMINE/Steam_Game_Recommender_Gemma
Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Unsloth: Saving to organization with address RICALISMINE/Steam_Game_Recommender_Gemma
Unsloth: Uploading all files... Please wait...


model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

Done.
Saved merged model to https://huggingface.co/None/Steam_Game_Recommender_Gemma
