In [1]:
export HF_HOME=/hpi/fs00/scratch/liudvikas.zekas/.cache
NUM_GPUS=2
DISTRIBUTED_ARGS="
    --nnodes=1 \
    --nproc_per_node ${NUM_GPUS} \
    --rdzv_backend c10d \
    --rdzv_endpoint localhost:0
"

# arguments that are very likely to be changed
# according to your own case
MODEL_ID=llava-1.5-7b                                   # model id; pick on by running `python supported_models.py`
TRAIN_DATA_PATH=./dataset_new/train.json  # path to the training data json file
EVAL_DATA_PATH=./dataset_new/val.json    # path to the evaluation data json file (optional)
IMAGE_FOLDER=/                      # path to the image root folder; if provided, the image paths in the json should be relative
VIDEO_FOLDER=/                  # path to the video root folder; if provided, the video paths in the json should be relative
NUM_FRAMES=8                                            # how many frames are sampled from each video

TRAIN_VISION_ENCODER=False                              # whether train the vision encoder
USE_VISION_LORA=False                                   # whether use lora for vision encoder (only effective when `TRAIN_VISION_ENCODER` is True)
TRAIN_VISION_PROJECTOR=False                            # whether train the vision projector (only full finetuning is supported)

USE_LORA=True                                           # whether use lora for llm
Q_LORA=False                                            # whether use q-lora for llm; only effective when `USE_LORA` is True
LORA_R=8                                                # the lora rank (both llm and vision encoder)
LORA_ALPHA=8                                            # the lora alpha (both llm and vision encoder)

RUN_ID=${MODEL_ID}_lora-${USE_LORA}_qlora-${Q_LORA}     # a custom run id that determines the checkpoint folder and wandb run name

DS_STAGE=zero3                                          # deepspeed stage; < zero2 | zero3 >
PER_DEVICE_BATCH_SIZE=2                                 # batch size per GPU
GRAD_ACCUM=1                                            # gradient accumulation steps
NUM_EPOCHS=5                                            # number of training epochs

LR=2e-5                                                 # learning rate
MODEL_MAX_LEN=1024                                       # maximum input length of the model


torchrun $DISTRIBUTED_ARGS lmms-finetune/train.py \
    --model_id $MODEL_ID \
    --data_path $TRAIN_DATA_PATH \
    --eval_data_path $EVAL_DATA_PATH \
    --image_folder $IMAGE_FOLDER \
    --video_folder $VIDEO_FOLDER \
    --num_frames $NUM_FRAMES \
    --output_dir /hpi/fs00/scratch/liudvikas.zekas/checkpoints/$RUN_ID \
    --report_to wandb \
    --run_name $RUN_ID \
    --deepspeed ./lmms-finetune/ds_configs/${DS_STAGE}.json \
    --bf16 True \
    --num_train_epochs $NUM_EPOCHS \
    --per_device_train_batch_size $PER_DEVICE_BATCH_SIZE \
    --per_device_eval_batch_size $PER_DEVICE_BATCH_SIZE \
    --gradient_accumulation_steps $GRAD_ACCUM \
    --eval_strategy "epoch" \
    --save_strategy "epoch" \
    --save_total_limit 1 \
    --learning_rate ${LR} \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 False \
    --model_max_length $MODEL_MAX_LEN \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --train_vision_encoder $TRAIN_VISION_ENCODER \
    --use_vision_lora $USE_VISION_LORA \
    --train_vision_projector $TRAIN_VISION_PROJECTOR \
    --use_lora $USE_LORA \
    --q_lora $Q_LORA \
    --lora_r $LORA_R \
    --lora_alpha $LORA_ALPHA


SyntaxError: unterminated string literal (detected at line 3) (1991594926.py, line 3)

In [1]:
import json
import math
import os

# Define the folder path
folder = "dataset_new"

# List of file names to process
files = ["train.json", "val.json"]

for filename in files:
    file_path = os.path.join(folder, filename)
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    
    # Calculate the number of items (at least one)
    n_items = max(1, math.ceil(len(data) * 0.01))
    
    # Take only the first 1% of the items
    new_data = data[:n_items]
    
    # Define new filename: e.g., train_new.json
    new_filename = filename.replace(".json", "_new.json")
    new_file_path = os.path.join(folder, new_filename)
    
    with open(new_file_path, "w", encoding="utf-8") as f:
        json.dump(new_data, f, indent=2)
    
    print(f"Processed {filename}: total items = {len(data)}, new file items = {n_items}")


Processed train.json: total items = 1366, new file items = 14
Processed val.json: total items = 293, new file items = 3
