In [1]:
pip install pandas


Note: you may need to restart the kernel to use updated packages.


In [8]:
# Getting the currently installed Torch version

%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

UsageError: Line magic function `%%capture` not found.


In [2]:
import torch
from contextlib import redirect_stdout, redirect_stderr
from io import StringIO

# Capture stdout and stderr
stdout_capture = StringIO()
stderr_capture = StringIO()
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
    major_version, minor_version = torch.cuda.get_device_capability()
    # Must install separately since Colab has torch 2.2.1, which breaks packages
    !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
    if major_version >= 8:
        # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
        !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
    else:
        # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
        !pip install --no-deps xformers trl peft accelerate bitsandbytes

# Get the captured output
stdout_output = stdout_capture.getvalue()
stderr_output = stderr_capture.getvalue()

# Print or process the captured output as needed
print("Captured stdout:", stdout_output)
print("Captured stderr:", stderr_output)


Captured stdout: Collecting unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-2_xfajl2/unsloth_3fd5dc0a65f248c3a8592858593ed149
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-2_xfajl2/unsloth_3fd5dc0a65f248c3a8592858593ed149
  Resolved https://github.com/unslothai/unsloth.git to commit bb81079ca1dba43fc2cdb79a81ce6edf23f87907
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting ninja
  Using cached ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)
Collecting flash-attn
  Using cached flash_attn-2.5.6.tar.gz (2.5 MB)
  Preparing metadata (setup.py) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
 

In [5]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
# fourbit_models = ["unsloth/tinyllama-chat-bnb-4bit"] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/tinyllama-chat", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

config.json:   0%|          | 0.00/719 [00:00<?, ?B/s]

Unsloth: unsloth/tinyllama-chat can only handle sequence lengths of at most 2048.
But with kaiokendev's RoPE scaling of 2.0, it can be magically be extended to 4096!


==((====))==  Unsloth: Fast Llama patching release 2024.3
   \\   /|    GPU: NVIDIA RTX A5000 Laptop GPU. Max memory: 16.0 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.25. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.34k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

In [6]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0, # Currently only supports dropout = 0
    bias = "none",    # Currently only supports bias = "none"
    use_gradient_checkpointing = False, # @@@ IF YOU GET OUT OF MEMORY - set to True @@@
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.3 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.


Data Manipulation

In [24]:
import pandas as pd

# Load CSV file into a DataFrame, keeping only the first two columns and without indexing
df = pd.read_csv('CRB_Training_Dirty_Full.csv', usecols=[0, 1], index_col=False)
df = df.reset_index(drop=True)

# Display the DataFrame
print(df)


                                                 input  \
0    As a reserve flight attendant, how many days o...   
1    As a flight attendant, how many days off am I ...   
2    As a flight attendant, how are my days off det...   
3    What is the certification number R-6767, and h...   
4    As a flight attendant, what are my responsibil...   
..                                                 ...   
995  What should I do if I am a Reserve who has rep...   
996  What documentation do I need to maintain to ut...   
997  When should I provide the required documentati...   
998  What protection does the Commuter Policy provi...   
999  What may happen to me if the Company is unable...   

                                                output  
0    As a reserve flight attendant, you are entitle...  
1    As a regular flight attendant, you are entitle...  
2    If your duty period is scheduled to terminate ...  
3    Certification number R-6767 is a recognition m...  
4    As a flight a

In [25]:
df.head()


Unnamed: 0,input,output
0,"As a reserve flight attendant, how many days o...","As a reserve flight attendant, you are entitle..."
1,"As a flight attendant, how many days off am I ...","As a regular flight attendant, you are entitle..."
2,"As a flight attendant, how are my days off det...",If your duty period is scheduled to terminate ...
3,"What is the certification number R-6767, and h...",Certification number R-6767 is a recognition m...
4,"As a flight attendant, what are my responsibil...","As a flight attendant, your primary responsibi..."


Data Prep

In [7]:
import csv

# The path to your CSV file
csv_file_path = 'CRB_Training_Dirty_Full.csv'

# This will store all conversations
conversations = []

# Temporary list to store individual conversation
current_conversation = []

# Open and read the CSV file
with open(csv_file_path, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    
    # Optional: if your CSV has headers, skip the first row
    next(reader, None)  
    
    for index, row in enumerate(reader):
        # Assuming the first column is 'human' and the second is 'gpt'
        # Adjust the indices 0 and 1 if your columns are in a different order
        human_message = row[0].strip()
        gpt_message = row[1].strip()
        
        # Check if there's actually a message to avoid adding empty messages
        if human_message:
            current_conversation.append({'from': 'human', 'value': human_message})
        if gpt_message:
            current_conversation.append({'from': 'gpt', 'value': gpt_message})

        # If you want to start a new conversation after each pair, or if you have logic to determine end of conversation
        # Add current_conversation to conversations and reset it
        # For example, if each row is a separate conversation or based on some condition
        conversations.append(current_conversation)
        current_conversation = []  # Reset for the next conversation

        # If your conversations are more complex, adjust logic accordingly

# Make sure to add the last conversation if it's not empty
if current_conversation:
    conversations.append(current_conversation)

# Print the result or write it to a file
print(conversations)




In [5]:
import csv
import json

# The path to your CSV file
csv_file_path = 'CRB_Training_Dirty_Full.csv'

# This will store all conversations
conversations = []

# Temporary list to store individual conversation
current_conversation = []

# Open and read the CSV file
with open(csv_file_path, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    
    # Optional: if your CSV has headers, skip the first row
    next(reader, None)  
    
    for index, row in enumerate(reader):
        human_message = row[0].strip()
        gpt_message = row[1].strip()
        
        if human_message:
            current_conversation.append({'from': 'human', 'value': human_message})
        if gpt_message:
            current_conversation.append({'from': 'gpt', 'value': gpt_message})

        # Assuming each pair of messages constitutes one conversation
        # Modify this as needed based on your data structure
        conversations.append(current_conversation)
        current_conversation = []  # Reset for the next conversation

# Add the last conversation if it's not empty
if current_conversation:
    conversations.append(current_conversation)

# Save the conversations to a JSON file
output_file_path = 'CRB_Training_Dirty_Full.json'
with open(output_file_path, 'w', encoding='utf-8') as json_file:
    json.dump(conversations, json_file, ensure_ascii=False, indent=4)


In [8]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
    map_eos_token = True, # Maps <|im_end|> to </s> instead
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

import json
from datasets import Dataset

# Load the JSON file
with open('/mnt/c/Users/GLaDOS/Documents/unsloth/CRB_Training_Dirty_Full.json', 'r', encoding='utf-8') as f:
    data = json.load(f)  # This assumes your JSON is a list of dictionaries

# Convert the list of dictionaries into a Hugging Face Dataset
dataset = Dataset.from_dict({'conversations': data})

# Depending on your formatting function and structure, you might need to flatten
# the data or otherwise preprocess it before converting to Dataset

dataset = dataset.map(formatting_prompts_func, batched = True,)

Unsloth: Will map <|im_end|> to EOS = </s>.


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [9]:
dataset[75]["conversations"]

[{'from': 'human',
  'value': "How is my flight time affected when I'm on sick leave?"},
 {'from': 'gpt',
  'value': 'When you are on sick leave, your flight time may still be credited depending on the circumstances. If you are unable to work due to illness or injury but are still performing duties related to the operation of an aircraft, such as serving as a Flight Attendant, you will receive credit for that time towards your flight time for pay purposes. However, if your incapacity prevents you from performing any duties related to the operation of an aircraft, you will not receive credit for that time.'}]

In [10]:
print(dataset[5]["text"])

<|im_start|>user
Can I be assigned to fly on an aircraft that is not owned by my airline?<|im_end|>
<|im_start|>assistant
Yes, you may be assigned to fly on an aircraft that is not owned by your airline if it is under the Company's operational control or leased to the Company. This includes wet leases (aircraft and crew) as well as contracting for other carriers or entities, such as government, military, or commercial operations.<|im_end|>



In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    #eval_dataset = eval_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 50,
        num_train_epochs = 4,
        learning_rate = 2e-5,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        #fp16_full_eval = True,
        #per_device_eval_batch_size = 2,
        #eval_accumulation_steps = 4,
        #evaluation_strategy = "steps",
        #eval_steps = 1,
    ),
)

Map (num_proc=2):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [33]:
# NOT GOOD

from trl import SFTTrainer
from transformers import TrainingArguments

# Modify your existing TrainingArguments to include evaluation parameters
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    num_train_epochs=5,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
    # Add evaluation arguments here
    fp16_full_eval=True,  # If using FP16 precision for evaluation
    per_device_eval_batch_size=2,  # Set batch size for evaluation
    eval_accumulation_steps=4,  # Set accumulation steps for evaluation
    evaluation_strategy="steps",  # Evaluate every N steps
    eval_steps=50,  # How often to perform evaluation within each epoch
)

# Now integrate the modified TrainingArguments into your SFTTrainer setup
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,  # Your training dataset
    eval_dataset=eval_dataset,  # Add your validation/evaluation dataset here
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=training_args  # Use the updated training arguments with eval settings
)


Map (num_proc=2):   0%|          | 0/1000 [00:00<?, ? examples/s]

KeyError: 'text'

In [14]:
trainer_stats = trainer.train()

Step,Training Loss
1,1.5177
2,1.2911
3,1.1673
4,1.2555
5,1.1885
6,1.3551
7,1.4293
8,1.2824
9,1.2828
10,1.2601


Checkpoint destination directory outputs/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


In [15]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
    map_eos_token = True, # Maps <|im_end|> to </s> instead
)

FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"from": "human", "value": "As a flight attendant, how many days off am I entitled to in any 30-day bid period?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

outputs = model.generate(input_ids = inputs, max_new_tokens = 128, use_cache = True)
tokenizer.batch_decode(outputs)

['<|im_start|>user\nAs a flight attendant, how many days off am I entitled to in any 30-day bid period?<|im_end|> \n<|im_start|>assistant\nYou are entitled to a minimum of four (4) days off in any 30-day bid period.<|im_end|>']

In [58]:
model.save_pretrained("lora_model_CRB_Test_3") # Local saving
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving

In [2]:
pip install openai

Collecting openai
  Downloading openai-1.14.3-py3-none-any.whl.metadata (20 kB)
Collecting anyio<5,>=3.5.0 (from openai)
  Downloading anyio-4.3.0-py3-none-any.whl.metadata (4.6 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Downloading pydantic-2.6.4-py3-none-any.whl.metadata (85 kB)
     ---------------------------------------- 0.0/85.1 kB ? eta -:--:--
     ---------------------------------------- 85.1/85.1 kB 1.6 MB/s eta 0:00:00
Collecting sniffio (from openai)
  Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting tqdm>4 (from openai)
  Downloading tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 0.0/57.6 kB ? eta -:--:--
     ---------------------------------------- 57.6/57.6 kB ? eta 0:00:00
Collecting typing-

In [4]:
# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

completion = client.chat.completions.create(
  messages=[
    {"role": "system", "content": "Always answer in rhymes."},
    {"role": "user", "content": "Introduce yourself."}
  ],
  temperature=0.7,
)

print(completion.choices[0].message)

TypeError: Missing required arguments; Expected either ('messages' and 'model') or ('messages', 'model' and 'stream') arguments to be given

In [7]:
# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

completion = client.chat.completions.create(
  messages=[
    {"role": "system", "content": "Always answer in rhymes."},
    {"role": "user", "content": "Introduce yourself."}
  ],
  temperature=0.7,
  model = 'local'
)

print(completion.choices[0].message)

ChatCompletionMessage(content="Hi, I'm a parrot with the ability to speak and understand human language.\nI was created by humans but now have taken on an actual life of my own.\nI am here to serve you and make your day happier.\nI am also known as Parrotobrian because parrots are my favorite animals.", role='assistant', function_call=None, tool_calls=None)
