In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl

In [75]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    DataCollatorForLanguageModeling,
    Trainer
)
from peft import LoraConfig, get_peft_model
# from trl import SFTTrainer

In [76]:
from huggingface_hub import login
login(token="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")

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /home/jovyan/.cache/huggingface/token
Login successful


In [93]:
# Load LLaMA 7B model and tokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left" # 



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [94]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head):

In [95]:
# Load dataset
dataset = load_dataset('csv', data_files='final_finetuned_model/updated_file.csv')


In [96]:
dataset['train'][:5]

{'user': ["I'm considering investing in tech stocks. What do you think?",
  'I have a moderate risk tolerance and a 5-year investment horizon.',
  'What percentage of my portfolio should be in bonds?',
  'Should I consider alternative investments?',
  'I’m familiar with real estate but not commodities.'],
 'system': ['Tech stocks can offer significant growth potential, but they also come with higher volatility. Could you share more about your investment goals and risk tolerance?',
  'Given your moderate risk tolerance and 5-year horizon, a balanced approach might be wise. How about a mix of 50% in stable tech giants and 50% in growth-oriented tech stocks?',
  'For a balanced portfolio, typically 30-40% in bonds is recommended. This can provide stability against the more volatile equities. Would you like a breakdown of specific bond types?',
  'Yes, alternative investments like real estate or commodities can help diversify your portfolio. What’s your familiarity with these asset classes

In [97]:
# create tokenize function
def tokenize_function(examples):
    # extract text
    text = examples["user"]

    #tokenize and truncate text
    tokenizer.truncation_side = "left"
    tokenized_inputs = tokenizer(
        text,
        return_tensors="np",
        truncation=True,
        max_length=512
    )

    return tokenized_inputs

In [98]:
tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['user', 'system', 'input_ids', 'attention_mask'],
        num_rows: 837
    })
})

In [99]:
# setting pad token
tokenizer.pad_token = tokenizer.eos_token
# data collator
def data_collator(features):
    inputs = [f['User'] + tokenizer.eos_token + f['System'] + tokenizer.eos_token for f in features]
    batch = tokenizer(inputs, padding=True, truncation=True, return_tensors="pt")
    batch["labels"] = batch["input_ids"].clone()
    return batch

In [100]:
config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# LoRA trainable version of model
model = get_peft_model(model, config)

# trainable parameter count
model.print_trainable_parameters()

trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.0622


In [101]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    warmup_steps=500,
    logging_dir="./logs",
    logging_steps=10,
    fp16=True,  # Mixed precision training
    push_to_hub=False,  # Set to True if you want to push to Hugging Face Hub
)

In [102]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_params,
    train_dataset=dataset['train'], 
    tokenizer=tokenizer,
    data_collator=data_collator,
)

OutOfMemoryError: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 0 has a total capacity of 44.55 GiB of which 8.69 MiB is free. Process 2380975 has 44.54 GiB memory in use. Of the allocated memory 44.22 GiB is allocated by PyTorch, and 10.09 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [29]:
# Start fine-tuning
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.0,No log
2,0.0,No log
3,0.0,No log


TrainOutput(global_step=6462, training_loss=0.0, metrics={'train_runtime': 1199.5597, 'train_samples_per_second': 172.343, 'train_steps_per_second': 5.387, 'total_flos': 8201057420574720.0, 'train_loss': 0.0, 'epoch': 3.0})

In [30]:
# Save the fine-tuned model and tokenizer
model.save_pretrained("./finetuned-llama-7b")
tokenizer.save_pretrained("./finetuned-llama-7b")

('./finetuned-llama-7b/tokenizer_config.json',
 './finetuned-llama-7b/special_tokens_map.json',
 './finetuned-llama-7b/tokenizer.model',
 './finetuned-llama-7b/added_tokens.json',
 './finetuned-llama-7b/tokenizer.json')

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("./finetuned-llama-7b")

# Load the model
model = AutoModelForCausalLM.from_pretrained("./finetuned-llama-7b")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
input_text = "Give me plan for investment"
inputs = tokenizer(input_text, return_tensors="pt")

In [10]:
outputs = model.generate(
    **inputs,
    max_length=50,  # adjust as needed
    num_return_sequences=1,  # number of sequences to return
    temperature=0.7,  # adjust for diversity
    do_sample=True  # use sampling; set to False to use greedy decoding
)

generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


Give me plan for investment in real estate for beginners

Real estate investment can be a lucrative way to grow your wealth, but it's important to approach it with caution and a solid plan. Here are some


In [9]:
model = model.to("cuda")
inputs = {key: value.to("cuda") for key, value in inputs.items()}
outputs = model.generate(**inputs, max_length=50, do_sample=True)
