In [2]:
!pip install deepspeed pydantic torch transformers tensorboard


Collecting deepspeed
  Using cached deepspeed-0.14.3-py3-none-any.whl
Collecting tensorboard
  Downloading tensorboard-2.17.0-py3-none-any.whl.metadata (1.6 kB)
Collecting hjson (from deepspeed)
  Downloading hjson-3.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting ninja (from deepspeed)
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)
Collecting nvidia-ml-py (from deepspeed)
  Downloading nvidia_ml_py-12.555.43-py3-none-any.whl.metadata (8.6 kB)
Collecting py-cpuinfo (from deepspeed)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting absl-py>=0.4 (from tensorboard)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting grpcio>=1.48.2 (from tensorboard)
  Downloading grpcio-1.64.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.3 kB)
Collecting markdown>=2.6.8 (from tensorboard)
  Downloading Markdown-3.6-py3-none-any.whl.metadata (7.0 kB)
Collecting protobuf!=4

In [3]:
# Simplified Dolly Training Script

import logging
import os
import re
from datetime import datetime
from training.consts import DEFAULT_INPUT_MODEL, SUGGESTED_INPUT_MODELS
from training.trainer import load_training_dataset, load_tokenizer
from training.generate import generate_response, load_model_tokenizer_for_generate

# Setup logging
logging.basicConfig(
    format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
)
logging.getLogger("py4j").setLevel(logging.WARNING)
logging.getLogger("sh.command").setLevel(logging.ERROR)

# Configurations
input_model = "EleutherAI/pythia-2.8b"  # Change this to the desired model
gpu_family = "a10"  # Change based on your GPU type
num_gpus = 1  # Set number of GPUs available
epochs = 2
batch_size = 2
learning_rate = 5e-6

# Paths
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
model_name = "dolly"
checkpoint_dir_name = f"{model_name}__{timestamp}"
local_training_root = os.path.join(os.path.expanduser('~'), "dolly_training")
local_output_dir = os.path.join(local_training_root, checkpoint_dir_name)
tensorboard_display_dir = f"{local_output_dir}/runs"

os.makedirs(local_output_dir, exist_ok=True)

print(f"Local Output Dir: {local_output_dir}")
print(f"Tensorboard Display Dir: {tensorboard_display_dir}")

# Deepspeed configuration file path
config_file_name = f"{gpu_family}_config.json"
deepspeed_config = os.path.join(os.getcwd(), "config", config_file_name)
print(f"Deepspeed config file: {deepspeed_config}")

# Adjust batch size based on GPU type
if gpu_family == "a10":
    batch_size = 4
elif gpu_family == "a100":
    batch_size = 6

bf16_flag = "--bf16 true" if gpu_family != "v100" else "--bf16 false"

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Fake data for training (create a small dataset for the example)
fake_data_path = "fake_data.jsonl"
fake_data = [
    {"instruction": "Translate 'Hello' to French", "response": "Bonjour"},
    {"instruction": "Translate 'Goodbye' to Spanish", "response": "Adiós"}
]

with open(fake_data_path, 'w') as f:
    for entry in fake_data:
        f.write(f"{entry}\n")

# Training command
training_command = f"""
deepspeed --num_gpus={num_gpus} \
    --module training.trainer \
    --input-model {input_model} \
    --deepspeed {deepspeed_config} \
    --epochs {epochs} \
    --local-output-dir {local_output_dir} \
    --per-device-train-batch-size {batch_size} \
    --per-device-eval-batch-size {batch_size} \
    --logging-steps 10 \
    --save-steps 200 \
    --save-total-limit 20 \
    --eval-steps 50 \
    --warmup-steps 50 \
    --test-size 200 \
    --lr {learning_rate} \
    {bf16_flag} \
    --data_path {fake_data_path}
"""

print(training_command)

# Run training (uncomment the following line to execute)
# !{training_command}

# Load model and tokenizer for inference
model, tokenizer = load_model_tokenizer_for_generate(local_output_dir)

# Example instructions
instructions = [
    "Write a love letter to Edgar Allan Poe.",
    "Write a tweet announcing Dolly, a large language model from Databricks.",
    "I'm selling my Nikon D-750, write a short blurb for my ad.",
    "Explain to me the difference between nuclear fission and fusion.",
    "Give me a list of 5 science fiction books I should read next."
]

# Set additional pipeline args based on GPU type
pipeline_kwargs = {'torch_dtype': "auto"}
if gpu_family == "v100":
    pipeline_kwargs['torch_dtype'] = "float16"
elif gpu_family in ["a10", "a100"]:
    pipeline_kwargs['torch_dtype'] = "bfloat16"

# Generate responses
for instruction in instructions:
    response = generate_response(instruction, model=model, tokenizer=tokenizer, **pipeline_kwargs)
    if response:
        print(f"Instruction: {instruction}\n\n{response}\n\n-----------\n")


ModuleNotFoundError: No module named 'click'