In [None]:
# Change the current directory to /content/ (default working directory in Google Colab)
# Remove any existing folder named "LLaMA-Factory" to ensure a clean environment
# Clone the LLaMA-Factory repository from GitHub
# Navigate into the cloned "LLaMA-Factory" directory
# List the files in the current directory to verify the repository structure
# Install the "unsloth" library from a specific GitHub repository, optimized for Colab usage
# Install the xformers library (a PyTorch library for efficient transformer models), version 0.0.25, without dependencies
# Install the current package (LLaMA-Factory) with support for bitsandbytes (optimized GPU computations for LLMs)
%cd /content/
%rm -rf LLaMA-Factory
!git clone https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
%ls
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers==0.0.25
!pip install .[bitsandbytes]

/content
Cloning into 'LLaMA-Factory'...
remote: Enumerating objects: 18916, done.[K
remote: Counting objects: 100% (954/954), done.[K
remote: Compressing objects: 100% (377/377), done.[K
^C
[Errno 2] No such file or directory: 'LLaMA-Factory'
/content
[0m[01;34msample_data[0m/
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-ri_s3mye/unsloth_df34412ac8f2463cbb0deca43b8abcfb
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-ri_s3mye/unsloth_df34412ac8f2463cbb0deca43b8abcfb
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/base_command.py", line 179, in exc_logging_wrapper
    status = run_func(*args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/req_command.py", line 67, in wrapper
    return

In [None]:
# Import PyTorch to ensure it is correctly installed
# Check if a GPU is available; if not, print a setup instruction for enabling a GPU in Colab
import torch
try:
  assert torch.cuda.is_available() is True
except AssertionError:
  print("Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6")

In [None]:
# Import the json module to manipulate the identity.json file
# Navigate to the LLaMA-Factory directory to ensure the script works on files in the correct path
# Define constants for replacing placeholders in the dataset
# Open the "identity.json" dataset file, load it into a Python dictionary, and replace placeholders with actual values
# Replace placeholders "NAME" and "AUTHOR" in the "output" field of each sample
# Save the updated dataset back to the "identity.json" file
import json

%cd /content/LLaMA-Factory/

NAME = "Llama-3"
AUTHOR = "LLaMA Factory"

with open("data/identity.json", "r", encoding="utf-8") as f:
  dataset = json.load(f)

for sample in dataset:
  sample["output"] = sample["output"].replace("NAME", NAME).replace("AUTHOR", AUTHOR)

with open("data/identity.json", "w", encoding="utf-8") as f:
  json.dump(dataset, f, indent=2, ensure_ascii=False)

/content/LLaMA-Factory


In [None]:
# Install required dependencies listed in the repository's requirements.txt file
# Install bitsandbytes library, which provides low-level GPU optimizations for machine learning models
# Install the rouge-chinese library, used for evaluating text summarization tasks in Chinese
# Install the Accelerate library, which helps efficiently run models on multi-GPU setups
# Install tiktoken, a fast tokenizer for LLMs such as GPT
# Install transformers_stream_generator, likely used for streaming text generation from transformer models
# Install bitsandbytes from PyPI (Python Package Index) again to ensure compatibility
# Install the accelerate library for managing hardware and distribution strategies
# Install specific versions of the transformers library to ensure compatibility
# Install the flash_attn library, which provides optimized implementations for attention mechanisms
# Install llama_index, used for constructing, managing, and querying large language model-based indices
!pip install -r requirements.txt
!pip install bitsandbytes
!pip install rouge-chinese
!pip install Accelerate
!pip install tiktoken
!pip install transformers_stream_generator
!pip install -i https://pypi.org/simple/ bitsandbytes
!pip install accelerate
!pip install transformers==4.39.3
!pip install transformers==4.40.0
!pip install transformers==4.40.1
!pip install flash_attn!pip install llama_index
!pip install llama-index llama-index.embeddings.huggingface

Collecting rouge-chinese
  Downloading rouge_chinese-1.0.3-py3-none-any.whl.metadata (7.6 kB)
Downloading rouge_chinese-1.0.3-py3-none-any.whl (21 kB)
Installing collected packages: rouge-chinese
Successfully installed rouge-chinese-1.0.3
Collecting transformers_stream_generator
  Downloading transformers-stream-generator-0.0.5.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: transformers_stream_generator
  Building wheel for transformers_stream_generator (setup.py) ... [?25l[?25hdone
  Created wheel for transformers_stream_generator: filename=transformers_stream_generator-0.0.5-py3-none-any.whl size=12425 sha256=e2c35ccfab9b78cf9b7c0a8013f707ff0972ff4a9c3920de03b7ac387cf4817b
  Stored in directory: /root/.cache/pip/wheels/95/4a/90/140f7b67d125906f6a165f38aad212ecb4a695ad0d87582437
Successfully built transformers_stream_generator
Installing collected packages: transformers_stream_generator
Successfully installed transformers_

In [None]:
# Import the os module for environment variable management
# Import the userdata module from Google Colab to access user data securely
# Set the Hugging Face token (if available) as an environment variable for authenticated access
import os
from google.colab import userdata
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN')

SecretNotFoundError: Secret HF_TOKEN does not exist.

In [None]:
# Reset the current environment, clearing all variables and imports
%reset -f

In [None]:

%cd /content/LLaMA-Factory/  # Return to the LLaMA-Factory directory to ensure commands run in the correct location
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" # Reinstall unsloth and xformers libraries to ensure a clean setup
!pip install --no-deps xformers==0.0.25
!pip install .[bitsandbytes] # Reinstall the LLaMA-Factory package with bitsandbytes support
!pip install scikit-learn # Install scikit-learn, a machine learning library often used for evaluating models


[Errno 2] No such file or directory: '/content/LLaMA-Factory/ # 确保在正确的目录'
/content/LLaMA-Factory
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-y0hqy27z/unsloth_318e065e0fd343cf8393b4653cc8acbb
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-y0hqy27z/unsloth_318e065e0fd343cf8393b4653cc8acbb
  Resolved https://github.com/unslothai/unsloth.git to commit fb77505f8429566f5d21d6ea5318c342e8a67991
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting transformers>=4.43.2 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Using cached transformers-4.44.2-py3-none-any.w

In [None]:
# Start the LLaMA Factory web interface with GRADIO_SHARE set to 1 for public sharing
!GRADIO_SHARE=1 llamafactory-cli webui

2024-09-24 00:13:28.007370: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-24 00:13:28.102882: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-24 00:13:28.145726: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-24 00:13:28.197840: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Running on local URL:  http://0.0.0.0:7860
Running on