In [25]:
!pip install -q requests torch peft bitsandbytes transformers trl accelerate sentencepiece wandb

In [1]:
import os
import glob
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer, TrainingArguments,AutoTokenizer
from datasets import load_dataset, Dataset
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from peft import PeftModel
from datetime import datetime
from dotenv import load_dotenv

In [3]:
# Load environment variables in a file called .env
from datasets import load_dataset, Dataset
load_dotenv()
hf_token = os.getenv('HF_TOKEN')

In [4]:
# A utility method to convert the text contents of a file into a list of methods

def extract_method_bodies(text):
    chunks = text.split('def trade')[1:]
    results = []
    for chunk in chunks:
        lines = chunk.split('\n')[1:]
        body = '\n'.join(line for line in lines if line!='\n')
        results.append(body)
    return results

In [5]:
# Read all .py files and convert into training data

bodies = []
for filename in glob.glob("*.py"):
    with open(filename, 'r', encoding='utf-8') as file:
        content = file.read()
        extracted = extract_method_bodies(content)
        bodies += extracted

print(f"Extracted {len(bodies)} trade method bodies")

Extracted 70 trade method bodies


In [6]:
# Constants
BASE_MODEL = "Qwen/CodeQwen1.5-7B"
PROJECT_NAME = "trading"
RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
DATASET_NAME = "ed-donner/trade_code_data"

# Hyperparameters for QLoRA Fine-Tuning

EPOCHS = 1
LORA_ALPHA = 32
LORA_R = 16
LORA_DROPOUT = 0.1
BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 1
LEARNING_RATE = 2e-4
LR_SCHEDULER_TYPE = 'cosine'
WEIGHT_DECAY = 0.001
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
MAX_SEQUENCE_LENGTH = 320

# Other config

STEPS = 10
SAVE_STEPS = 300

In [7]:
# Log in to Weights & Biases
wandb_api_key = os.getenv('WANDB_API_KEY')
wandb.login()
# Configure Weights & Biases to record against our project
os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "end"
os.environ["WANDB_WATCH"] = "false"

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: khandelwalkeshav2004 (khandelwalkeshav2004-jecrc-university) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [None]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token is set
tokenizer.padding_side = "right"           # Set padding side

Configure 4-bit quantization
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",           # Common choice
    bnb_4bit_compute_dtype="float16"     # Improves compatibility and speed
)

# Load the model with quantization
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    # quantization_config=quant_config,
    device_map="auto",
    trust_remote_code=True              # Recommended if using custom models
)
# Set generation padding token
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

# Print memory footprint in MB
print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB")

In [None]:
prompt = """
# tickers is a list of stock tickers
import tickers

# prices is a dict; the key is a ticker and the value is a list of historic prices, today first
import prices

# Trade represents a decision to buy or sell a quantity of a ticker
import Trade

import random
import numpy as np

def trade():
"""

In [None]:
from transformers import TextStreamer
streamer = TextStreamer(tokenizer)

inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
outputs = base_model.generate(inputs, max_new_tokens=100, streamer=streamer)

In [None]:
# Load our dataset
dataset = load_dataset(DATASET_NAME)['train']
dataset

In [None]:
# First, specify the configuration parameters for LoRA

peft_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

# Next, specify the general configuration parameters for training

train_params = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim="paged_adamw_32bit",
    save_steps=SAVE_STEPS,
    save_total_limit=10,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    # fp16=False,
    # bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb",
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
)

# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    peft_config=peft_parameters,
    tokenizer=tokenizer,
    args=train_params
)

# Fine-tune!
fine_tuning.train()

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)

In [None]:
fine_tuned_model = PeftModel.from_pretrained(base_model, PROJECT_RUN_NAME)

In [None]:
# Code up a trade

inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
outputs = fine_tuned_model.generate(inputs, max_new_tokens=120, streamer=streamer)

In [None]:
outputs = fine_tuned_model.generate(inputs, max_new_tokens=180, do_sample=True, temperature=0.8, streamer=streamer)