In [None]:
GPU_TRAINING=True
FORCE_DISABLE_TRITON=False
TORCH_VERSION='2.3.1'
TRITON_VERSION='2.3.0'
import os,importlib,sys,subprocess
UNINSTALL_UNUSED=True
# Remove prior torch to avoid mix
subprocess.run([sys.executable,'-m','pip','uninstall','-y','torch','torchvision','torchaudio','torchtext','triton'],stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
if UNINSTALL_UNUSED:
    subprocess.run([sys.executable,'-m','pip','uninstall','-y','fastai','timm','sentence-transformers','bigframes','cudf-cu12','cuml-cu12','dask-cudf-cu12','torchtune','spacy','albumentations','albucore','xgboost'],stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
subprocess.check_call([sys.executable,'-m','pip','install','-q','--upgrade','pip'])
subprocess.check_call([sys.executable,'-m','pip','install','-q','numpy==2.0.1'])
if GPU_TRAINING:
    subprocess.check_call([sys.executable,'-m','pip','install','-q',f'torch=={TORCH_VERSION}','--index-url','https://download.pytorch.org/whl/cu121'])
    if not FORCE_DISABLE_TRITON:
        subprocess.check_call([sys.executable,'-m','pip','install','-q',f'triton=={TRITON_VERSION}'])
else:
    subprocess.check_call([sys.executable,'-m','pip','install','-q',f'torch=={TORCH_VERSION}+cpu','-f','https://download.pytorch.org/whl/torch_stable.html'])
subprocess.check_call([sys.executable,'-m','pip','install','-q','fsspec==2024.5.0','gcsfs==2024.5.0','transformers==4.43.3','accelerate==0.34.2','datasets==2.20.0','trl==0.10.1','peft==0.11.1','bitsandbytes==0.44.0','safetensors==0.4.3','sentencepiece==0.2.0'])
import torch
print('Torch',torch.__version__,'CUDA',torch.cuda.is_available())
has_triton=importlib.util.find_spec('triton') is not None
if has_triton:
    import triton; print('Triton',getattr(triton,'__version__','?'))
if FORCE_DISABLE_TRITON:
    os.environ['USE_TRITON']='0'; os.environ['TRITON_DISABLE']='1'
USE_4BIT_RUNTIME_OK=True
if GPU_TRAINING and torch.cuda.is_available():
    try:
        import bitsandbytes as bnb; assert hasattr(bnb,'nn')
    except Exception:
        USE_4BIT_RUNTIME_OK=False
else:
    USE_4BIT_RUNTIME_OK=False
os.environ['AUTO_DISABLE_4BIT']='1' if not USE_4BIT_RUNTIME_OK else '0'
import numpy,transformers,accelerate,peft,datasets
print('Versions',numpy.__version__,transformers.__version__,accelerate.__version__,peft.__version__,datasets.__version__)

In [None]:
BASE_MODEL='TinyLlama/TinyLlama-1.1B-Chat'
OUTPUT_DIR='tinyllama_lora_adapter'
MERGED_OUTPUT_DIR='tinyllama_merged'
USE_4BIT=True
LORA_R=16
LORA_ALPHA=32
LORA_DROPOUT=0.05
NUM_EPOCHS=2
BATCH_SIZE=4
GR_ACCUM=4
LEARNING_RATE=2e-4
WARMUP_RATIO=0.05
MAX_SEQ_LEN=1024
SEED=42
import torch,os,random
if not torch.cuda.is_available():
    USE_4BIT=False
# Optional Hugging Face token (set env HF_TOKEN or assign directly below)
HF_TOKEN=os.environ.get('HF_TOKEN') or None
# Candidate public model IDs to try (order matters). Some variants may be available while others temporarily fail.
CANDIDATE_MODELS=[
    BASE_MODEL,
    'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    'TinyLlama/TinyLlama-1.1B-Chat',
    'TinyLlama/TinyLlama_v1.1'
]
random.seed(SEED); torch.manual_seed(SEED)
os.makedirs(OUTPUT_DIR,exist_ok=True)

In [None]:
train_records=[]
eval_records=[]
import os,sys,json
DATA_PATHS=[]
# Upload JSONL files. Any filename containing 'eval' or 'validation' (case-insensitive) treated as eval set.
if 'google.colab' in sys.modules or 'COLAB_RELEASE_TAG' in os.environ:
    from google.colab import files
    uploaded=files.upload()
    for name,data in uploaded.items():
        with open(name,'wb') as f: f.write(data)
        DATA_PATHS.append(name)
else:
    raise RuntimeError('Colab upload environment not detected.')
if not DATA_PATHS:
    raise ValueError('No files uploaded.')

def load_jsonl(path):
    out=[]
    with open(path,'r',encoding='utf-8') as f:
        for line in f:
            line=line.strip()
            if not line: continue
            try:
                obj=json.loads(line)
            except Exception:
                continue
            instr=obj.get('instruction') or obj.get('prompt') or obj.get('input')
            kql=obj.get('kql') or obj.get('output')
            if instr and kql:
                out.append({'instruction':instr,'kql':kql})
    return out
for p in DATA_PATHS:
    if os.path.exists(p):
        recs=load_jsonl(p)
        lname=p.lower()
        if ('eval' in lname) or ('validation' in lname):
            eval_records.extend(recs)
        else:
            train_records.extend(recs)
print(f'Train records: {len(train_records)}  Eval records (external): {len(eval_records)}')
if not train_records:
    raise ValueError('No valid training instruction/kql records found.')
# If no external eval records, a split will be created later.

In [None]:
INSTRUCTION_PREFIX="You are a SOC assistant. Convert the user request into a **safe, time-bounded** KQL query.\nReturn ONLY KQL, no explanations.\nRequest: "

def build_training_example(rec):
    return f"{INSTRUCTION_PREFIX}{rec['instruction']}\nKQL:\n{rec['kql']}"

formatted_train=[build_training_example(r) for r in train_records]
formatted_eval=[build_training_example(r) for r in eval_records] if eval_records else []

In [None]:
from datasets import Dataset,DatasetDict
train_ds=Dataset.from_list([{'text':t} for t in formatted_train])
if formatted_eval:
    eval_ds=Dataset.from_list([{'text':t} for t in formatted_eval])
    ds=DatasetDict({'train':train_ds,'eval':eval_ds})
else:
    if len(train_ds)>5:
        split=train_ds.train_test_split(test_size=0.1,seed=42)
        ds=DatasetDict({'train':split['train'],'eval':split['test']})
    else:
        ds=DatasetDict({'train':train_ds,'eval':train_ds})

In [None]:
from transformers import AutoTokenizer
MAX_SEQ_LEN=1024 if 'MAX_SEQ_LEN' not in globals() else MAX_SEQ_LEN
if 'tokenizer' not in globals():
    last_err=None
    for mid in CANDIDATE_MODELS:
        try:
            tokenizer=AutoTokenizer.from_pretrained(mid,use_fast=True,token=HF_TOKEN)
            print('Loaded tokenizer from',mid)
            BASE_MODEL=mid
            break
        except Exception as e:
            last_err=e
            print('Tokenizer load failed for',mid,'->',type(e).__name__,str(e)[:120])
    if 'tokenizer' not in globals():
        raise last_err
if tokenizer.pad_token is None:
    tokenizer.pad_token=tokenizer.eos_token
print('Train examples:',len(ds['train']),' Eval examples:',len(ds['eval']))

def tokenize(batch):
    return tokenizer(batch['text'],max_length=MAX_SEQ_LEN,truncation=True)

tokenized=ds.map(tokenize,batched=True,remove_columns=['text'])

In [None]:
from transformers import BitsAndBytesConfig,DataCollatorForLanguageModeling,TrainingArguments,Trainer,AutoModelForCausalLM,set_seed
from peft import LoraConfig,get_peft_model,prepare_model_for_kbit_training
import math,torch,os,shutil
bnb_config=None
if USE_4BIT and torch.cuda.is_available():
    bnb_config=BitsAndBytesConfig(load_in_4bit=True,bnb_4bit_quant_type='nf4',bnb_4bit_use_double_quant=True,bnb_4bit_compute_dtype=torch.bfloat16)
model=None
last_err=None
for mid in CANDIDATE_MODELS:
    try:
        model=AutoModelForCausalLM.from_pretrained(
            mid,
            quantization_config=bnb_config,
            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
            device_map='auto' if (torch.cuda.is_available() and bnb_config) else None,
            token=HF_TOKEN
        ) if bnb_config else AutoModelForCausalLM.from_pretrained(
            mid,
            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
            token=HF_TOKEN
        )
        print('Loaded model from',mid)
        BASE_MODEL=mid
        break
    except Exception as e:
        last_err=e
        print('Model load failed for',mid,'->',type(e).__name__,str(e)[:160])
if model is None:
    raise last_err
if bnb_config: model=prepare_model_for_kbit_training(model)
lora_cfg=LoraConfig(r=LORA_R,lora_alpha=LORA_ALPHA,lora_dropout=LORA_DROPOUT,bias='none',task_type='CAUSAL_LM',target_modules=['q_proj','k_proj','v_proj','o_proj'])
model=get_peft_model(model,lora_cfg)
set_seed(SEED if 'SEED' in globals() else 42)
collator=DataCollatorForLanguageModeling(tokenizer=tokenizer,mlm=False)
steps_per_epoch=math.ceil(len(tokenized['train'])/(BATCH_SIZE*GR_ACCUM))
warmup_steps=int(steps_per_epoch*NUM_EPOCHS*WARMUP_RATIO)
training_args=TrainingArguments(output_dir='train_out',per_device_train_batch_size=BATCH_SIZE,per_device_eval_batch_size=BATCH_SIZE,gradient_accumulation_steps=GR_ACCUM,learning_rate=LEARNING_RATE,warmup_steps=warmup_steps,num_train_epochs=NUM_EPOCHS,logging_steps=1,evaluation_strategy='epoch',save_strategy='epoch',bf16=torch.cuda.is_available(),gradient_checkpointing=torch.cuda.is_available(),report_to=[],optim='paged_adamw_8bit' if (bnb_config is not None) else 'adamw_torch')
trainer=Trainer(model=model,args=training_args,train_dataset=tokenized['train'],eval_dataset=tokenized['eval'],data_collator=collator,tokenizer=tokenizer)
trainer.train(); trainer.save_state()
model.save_pretrained(OUTPUT_DIR); tokenizer.save_pretrained(OUTPUT_DIR)
prompt='failed logins last 1 hour'
inputs=tokenizer(f"{INSTRUCTION_PREFIX}{prompt}\nKQL:\n",return_tensors='pt')
if torch.cuda.is_available(): inputs=inputs.to(model.device)
with torch.no_grad(): out=model.generate(**inputs,max_new_tokens=120,temperature=0.2,do_sample=False,pad_token_id=tokenizer.eos_token_id)
full=tokenizer.decode(out[0],skip_special_tokens=True)
open('gen_sample.txt','w').write(full)
zip_name=OUTPUT_DIR+'.zip'
if os.path.exists(zip_name): os.remove(zip_name)
shutil.make_archive(OUTPUT_DIR,'zip',OUTPUT_DIR)
try:
    from google.colab import files; files.download(zip_name)
except Exception:
    pass