In [1]:
import torch
print("CUDA disponible :", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU :", torch.cuda.get_device_name(0))


CUDA disponible : True
GPU : Tesla T4


In [2]:
!pip -q install transformers datasets peft accelerate bitsandbytes


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from datasets import load_dataset

data_files = {"train": "sft_train.jsonl", "validation": "sft_val.jsonl"}
ds = load_dataset("json", data_files=data_files)

def format_example(ex):
    # We train a causal LM on (prompt + completion)
    text = ex["prompt"].rstrip() + "\n" + ex["completion"].strip()
    return {"text": text}

ds = ds.map(format_example, remove_columns=ds["train"].column_names)
ds


Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1944 [00:00<?, ? examples/s]

Map:   0%|          | 0/217 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 1944
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 217
    })
})

In [4]:
from transformers import AutoTokenizer

MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

MAX_LEN = 512

def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        max_length=MAX_LEN,
        padding="max_length",
    )

tokenized = ds.map(tokenize, batched=True, remove_columns=["text"])
tokenized


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Map:   0%|          | 0/1944 [00:00<?, ? examples/s]

Map:   0%|          | 0/217 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 1944
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 217
    })
})

In [5]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"],
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023


In [7]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

args = TrainingArguments(
    output_dir="steam_lora_out",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=1,
    logging_steps=25,
    eval_strategy="steps",   # <-- FIX (old: evaluation_strategy)
    eval_steps=100,
    save_steps=100,
    save_total_limit=2,
    fp16=True,
    report_to="none",
)

collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    data_collator=collator,
)

trainer.train()


Step,Training Loss,Validation Loss
100,1.2876,1.299209


TrainOutput(global_step=122, training_loss=1.5519178265430889, metrics={'train_runtime': 367.7987, 'train_samples_per_second': 5.285, 'train_steps_per_second': 0.332, 'total_flos': 6184801677606912.0, 'train_loss': 1.5519178265430889, 'epoch': 1.0})

In [8]:
model.save_pretrained("steam_lora_adapter")
tokenizer.save_pretrained("steam_lora_adapter")
print("Saved to: steam_lora_adapter/")


Saved to: steam_lora_adapter/


In [9]:
import pandas as pd
import torch
from transformers import AutoTokenizer

# Reload tokenizer from adapter folder (safe)
tok = AutoTokenizer.from_pretrained("steam_lora_adapter", use_fast=True)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token

def generate_one(prompt: str, max_new_tokens: int = 180) -> str:
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.8,
            top_p=0.9,
            eos_token_id=tok.eos_token_id,
            pad_token_id=tok.pad_token_id,
        )
    full = tok.decode(out[0], skip_special_tokens=True)
    # Return only the continuation after the prompt
    if full.startswith(prompt):
        return full[len(prompt):].strip()
    return full.strip()

df = pd.read_csv("prompt_batch.csv")

subset = df[df["method"] == "engineered"].copy()
subset_small = subset.head(20).copy()

subset_small["generated_text"] = subset_small["prompt"].apply(generate_one)
subset_small["method"] = "finetuned"

out_path = "prompt_batch_finetuned_sample.csv"
subset_small.to_csv(out_path, index=False, encoding="utf-8")
print("Saved:", out_path)
subset_small[["title", "rating", "method"]].head()


Saved: prompt_batch_finetuned_sample.csv


Unnamed: 0,title,rating,method
1,Team Fortress 2,3,finetuned
3,Team Fortress 2,3,finetuned
5,Team Fortress 2,3,finetuned
7,Team Fortress 2,3,finetuned
9,Team Fortress 2,3,finetuned


In [10]:
subset_small[["title", "rating", "generated_text"]].head(3)


Unnamed: 0,title,rating,generated_text
1,Team Fortress 2,3,Review:\nThe thing about team fortress 2 is th...
3,Team Fortress 2,3,"Review:\nI hate this game. It's not fun, it's ..."
5,Team Fortress 2,3,Review:\nThis game has an insane amount of bug...


In [12]:
neg_row = subset[subset["rating"] == 3].sample(1, random_state=0)
pos_row = subset[subset["rating"] == 9].sample(1, random_state=0)

print("NEG prompt rating:", int(neg_row["rating"].iloc[0]))
print("POS prompt rating:", int(pos_row["rating"].iloc[0]))


NEG prompt rating: 3
POS prompt rating: 9


In [13]:
model.eval()

def generate_one_clean(prompt: str, max_new_tokens: int = 140) -> str:
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    input_len = inputs["input_ids"].shape[1]

    with torch.inference_mode():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.15,
            no_repeat_ngram_size=3,
            eos_token_id=tok.eos_token_id,
            pad_token_id=tok.pad_token_id,
            use_cache=True,
        )

    gen_ids = out[0][input_len:]
    text = tok.decode(gen_ids, skip_special_tokens=True).strip()

    if text.lower().startswith("review:"):
        text = text.split(":", 1)[-1].strip()

    return text


In [14]:
import pandas as pd

df = pd.read_csv("prompt_batch.csv")
subset = df[df["method"] == "engineered"].copy()

neg_prompt = subset[subset["rating"] == 3].sample(1, random_state=0)["prompt"].iloc[0]
pos_prompt = subset[subset["rating"] == 9].sample(1, random_state=0)["prompt"].iloc[0]

print("NEG:\n", generate_one_clean(neg_prompt))
print("\n---\n")
print("POS:\n", generate_one_clean(pos_prompt))


NEG:
 The only thing that works here is the random server selection. Even when you're in a random server, you still have no idea who your enemy is going to be. This is why I never play this game again.

---

POS:
 I've been playing this game since I was 13 or so (which is ages ago) and it still feels fresh and exciting. The balance of team vs player has always kept me coming back with new experiences. One thing that keeps me coming is the ever changing game modes. From classic maps like DOTA, TDM to more modern games like Team Deathmatch and Rumble. There's something for everyone here. Just be careful not to get too far out of your depth though as you'll eventually end up getting killed! 
Overall, I highly recommend this game to any gamer who loves multiplayer. It'


In [15]:
neg_part = subset[subset["rating"] == 3].sample(25, random_state=0)
pos_part = subset[subset["rating"] == 9].sample(25, random_state=0)

subset_bal = pd.concat([neg_part, pos_part], ignore_index=True)
subset_bal["generated_text"] = subset_bal["prompt"].apply(generate_one_clean)
subset_bal["method"] = "finetuned"

out_path = "prompt_batch_finetuned_balanced_50.csv"
subset_bal.to_csv(out_path, index=False, encoding="utf-8")
print("Saved:", out_path, subset_bal.shape)


Saved: prompt_batch_finetuned_balanced_50.csv (50, 6)
