<a href="https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_LoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RWKV LoRA Fine-Tuning

[RWKV](https://github.com/BlinkDL/RWKV-LM) is an RNN with transformer-level performance


This notebook aims to streamline fine-tuning using [LoRA](https://github.com/Blealtan/RWKV-LM-LoRA)


## Setup

In [None]:
#@title Google Drive Options { display-mode: "form" }
save_models_to_drive = True #@param {type:"boolean"}
drive_mount = '/content/drive' #@param {type:"string"}
output_dir = 'rwkv-v4-lora' #@param {type:"string"}
tuned_model_name = 'tuned' #@param {type:"string"}

import os
if save_models_to_drive:
    from google.colab import drive
    drive.mount(drive_mount)
    output_path = f"{drive_mount}/MyDrive/{output_dir}" if save_models_to_drive else f"/content/{output_dir}"
else:
    output_path = "/content"

tuned_model_dir = f"{output_path}/{tuned_model_name}"
os.makedirs(tuned_model_dir, exist_ok=True)
os.makedirs(f"{output_path}/base_models/", exist_ok=True)

print(f"Saving models to {output_path}")

In [None]:
!nvidia-smi

In [None]:
!git clone https://github.com/Blealtan/RWKV-LM-LoRA
repo_dir = "/content/RWKV-LM-LoRA/RWKV-v4neo"
%cd $repo_dir

In [None]:
!pip install transformers pytorch-lightning==1.9 deepspeed wandb ninja

## Load Base Model




In [None]:
#@title Base Model Options
#@markdown Using any of the listed options will download the checkpoint from huggingface
base_model_name = "RWKV-4-Pile-1B5" #@param ["RWKV-4-Pile-14B", "RWKV-4-Pile-7B", "RWKV-4-Pile-3B", "RWKV-4-Pile-1B5", "RWKV-4-Pile-430M", "RWKV-4-Pile-169M"]

if base_model_name == "RWKV-4-Pile-169M":
    base_model_file = "RWKV-4-Pile-169M-20220807-8023.pth"
    n_layer = 12
    n_embd = 768
elif base_model_name == "RWKV-4-Pile-430M":
    base_model_file = "RWKV-4-Pile-430M-20220808-8066.pth"
    n_layer = 24
    n_embd = 1024
elif base_model_name == "RWKV-4-Pile-1B5":
    base_model_file = "RWKV-4-Pile-1B5-20220903-8040.pth"
    n_layer = 24
    n_embd = 2048
elif base_model_name == "RWKV-4-Pile-3B":
    base_model_file = "RWKV-4-Pile-3B-20221008-8023.pth"
    n_layer = 32
    n_embd = 2560
elif base_model_name == "RWKV-4-Pile-7B":
    base_model_file = "RWKV-4-Pile-7B-20221115-8047.pth"
    n_layer = 32
    n_embd = 4096
elif base_model_name == "RWKV-4-Pile-14B":
    base_model_file = "RWKV-4-Pile-14B-20230213-8019.pth"
    n_layer = 40
    n_embd = 5120

base_model_url = f"https://huggingface.co/BlinkDL/{base_model_name.lower()}/resolve/main/{base_model_file}"
base_model_path = f"{output_path}/base_models/{base_model_file}"

if save_models_to_drive and not os.path.exists(base_model_path):
    import urllib.request
    print(f"Downloading {base_model_name} this may take a while")
    urllib.request.urlretrieve(base_model_url, base_model_path)

print(f"Using {base_model_path} as base")

## Generate Training Data

In [None]:
#@title Training Data Options
#@markdown `input_file` should be the path to a single file that contains the text you want to fine-tune with.
#@markdown Either upload a file to this notebook instance or reference a file in your Google drive.

import numpy as np
from transformers import PreTrainedTokenizerFast

tokenizer = PreTrainedTokenizerFast(tokenizer_file=f'{repo_dir}/20B_tokenizer.json')

input_file = "/content/drive/MyDrive/training.txt" #@param {type:"string"}
output_file = 'train.npy'

print(f'Tokenizing {input_file} (VERY slow. please wait)')

data_raw = open(input_file, encoding="utf-8").read()
print(f'Raw length = {len(data_raw)}')

data_code = tokenizer.encode(data_raw)
print(f'Tokenized length = {len(data_code)}')

out = np.array(data_code, dtype='uint16')
np.save(output_file, out, allow_pickle=False)

## Training

In [None]:
#@title Begin Training with these Options { display-mode: "form" }
n_epoch = 100 #@param {type:"integer"}
epoch_save_frequency = 5 #@param {type:"integer"}
batch_size =  1 #@param {type:"integer"} 
ctx_len = 1024 #@param {type:"integer"}
vocab_size = 50277 #@param {type:"integer"}

!python3 train.py \
  --load_model $base_model_path \
  --proj_dir $tuned_model_dir \
  --data_file "train.npy" \
  --data_type "numpy" \
  --vocab_size $vocab_size \
  --ctx_len $ctx_len \
  --epoch_save $epoch_save_frequency \
  --epoch_count $n_epoch \
  --n_layer $n_layer \
  --n_embd $n_embd \
  --epoch_steps 1000 --epoch_begin 0  --micro_bsz $batch_size --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0 \
  --lora --lora_r 8 --lora_alpha 32 --lora_dropout 0.01