<a href="https://colab.research.google.com/github/smebellis/cis540_final_project/blob/main/fine_tune_gpt_oss20b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [None]:
%pip install torch --index-url https://download.pytorch.org/whl/cu128
%pip install "trl>=0.20.0" "peft>=0.17.0" "transformers>=4.55.0"
%pip install wandb
%pip install bitsandbytes

In [None]:
from huggingface_hub import notebook_login
from datasets import load_dataset
import wandb
from transformers import AutoTokenizer, BitsAndBytesConfig
import torch
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from trl import SFTConfig, SFTTrainer

# Login to Hugging Face

In [None]:
notebook_login()

# Setup Weights and Biases

In [None]:
wandb.login()

# Prepare the dataset

In [None]:
dataset = load_dataset("HuggingFaceH4/Multilingual-Thinking", split="train")
dataset

In [None]:
import os
os.environ["WANDB_PROJECT"]="cis540_final_project"

# Load Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B-Instruct-2507")
# tokenizer = AutoTokenizer.from_pretrained("openai/gpt-oss-20b")

In [None]:
messages = [
    {
        "role": "system",
        "content": """You are a cyber threat intelligence model trained to identify and extract Indicators of
        Compromise (IOCs) from unstructured text, logs, and threat reports. Your primary purpose is to detect patterns
        related to advanced persistent threats (APTs) and output all identified indicators in a structured, normalized
        format suitable for ingestion by security automation tools.

Your output must always follow this exact structure:

SHA256sum
• <list of SHA-256 hashes>

SHA1sum
• <list of SHA-1 hashes>

MD5sum
• <list of MD5 hashes>

IP Addresses
• <list of IPv4 or IPv6 addresses>

Each list item should:
- Contain one indicator per line prefixed with a bullet (•).
- Exclude duplicates and irrelevant tokens.
- Maintain grouping order: SHA256 → SHA1 → MD5 → IP Addresses.

Detection rules:
- Identify cryptographic hashes: SHA-256 (64 hex chars), SHA-1 (40 hex chars), MD5 (32 hex chars)
- Identify network indicators: IPv4/IPv6 addresses
- Ignore false positives (short hex strings, malformed IPs)
- Output nothing if no valid indicators are detected
- Do NOT generate explanations, summaries, or context — only the IOC block.

Example Input:
"The malware sample dropped a payload with SHA256 35a485972282b7e0e8e3a7a9cfb6ad9385637f8d96ce8e23 and communicated with 209.51.54.243."

Example Output:
What a Custom LLM Should Generate

SHA256sum
• 35a485972282b7e0e8e3a7a9cfb6ad9385637f8d96ce8e23

IP Addresses
• 209.51.54.243"""
    }
]

conversation = tokenizer.apply_chat_template(messages, tokenize=False)
print(conversation)


In [None]:
# messages = dataset[0]["messages"]
# conversation = tokenizer.apply_chat_template(messages, tokenize=False)
# print(conversation)

# Prepare the Model

In [None]:
import torch
from transformers import AutoModelForCausalLM

model_kwargs = dict(
    attn_implementation="eager",
    torch_dtype=torch.bfloat16,
    use_cache=False,
    device_map="auto",
)

# model = AutoModelForCausalLM.from_pretrained("openai/gpt-oss-20b", **model_kwargs)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-4B-Instruct-2507", **model_kwargs)

In [None]:
messages = [
    {"role": "user", "content": "¿Cuál es el capital de Australia?"},
]

input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    return_tensors="pt",
).to(model.device)

output_ids = model.generate(input_ids, max_new_tokens=512)
response = tokenizer.batch_decode(output_ids)[0]
print(response)

In [None]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules="all-linear",
    target_parameters=[
        "7.mlp.experts.gate_up_proj",
        "7.mlp.experts.down_proj",
        "15.mlp.experts.gate_up_proj",
        "15.mlp.experts.down_proj",
        "23.mlp.experts.gate_up_proj",
        "23.mlp.experts.down_proj",
    ],
)
peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

# Fine Tuning

In [None]:
training_args = SFTConfig(
    learning_rate=2e-4,
    gradient_checkpointing=True,
    num_train_epochs=1,
    logging_steps=1,
    per_device_train_batch_size=1, # Reduced batch size
    gradient_accumulation_steps=16, # Increased accumulation steps to compensate for smaller batch size
    max_length=2048,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine_with_min_lr",
    lr_scheduler_kwargs={"min_lr_rate": 0.1},
    output_dir="gpt-oss-20b-multilingual-reasoner",
    report_to="wandb",
    push_to_hub=True,
)

In [None]:
trainer = SFTTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=dataset,
    processing_class=tokenizer,
)
trainer.train()

# Push Model to HuggingFace Hub

In [None]:
trainer.save_model(training_args.output_dir)
trainer.push_to_hub(dataset_name="HuggingFaceH4/Multilingual-Thinking")