In [None]:
import os
os.environ['LC_ALL'] = 'en_US.UTF-8'
os.environ['LANG'] = 'en_US.UTF-8'
os.environ['LC_CTYPE'] = 'en_US.UTF-8'

In [None]:
from pathlib import Path
from typing import Optional

def create_directory(path: Optional[Path] = None, dir_name: str = "output"):
    """
    Creates a directory at the specified path with the given directory name.
    If no path is provided, the current working directory is used.

    Parameters:
    - path (Optional[Path]): The path where the directory is to be created.
    - dir_name (str): The name of the directory to create.

    Returns:
    - Path object representing the path to the created directory.
    """
    # Use the current working directory if no path is provided
    working_dir = path if path is not None else Path('./')

    # Define the output directory path by joining paths
    output_directory = working_dir / dir_name

    # Create the directory if it doesn't exist
    output_directory.mkdir(parents=True, exist_ok=True)

    return output_directory

output_dir = create_directory(dir_name="fine-tuned-checkpoints")
print(f"Directory created at: {output_dir}")

Directory created at: fine-tuned-checkpoints


In [None]:
%%capture
!pip install -q -U bitsandbytes
!pip install -q -U transformers
!pip install -q -U peft
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U trl
!pip install ninja

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import SFTTrainer
import torch

# Load model

In [None]:
model_name = "Deci/DeciLM-7B"

gpu_memory = torch.cuda.get_device_properties(0).total_memory

do_quantization = gpu_memory < 20e9

if do_quantization:
    bnb_config = BitsAndBytesConfig(
        load_in_4bit = True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        # bfloat works only on A100 (or ampere supported chip)
        bnb_4bit_compute_dtype=torch.bfloat16,
        # if you're using a T4 or non-ampere chip comment out the above and run this instead:
        # bnb_4bit_compute_dtype=torch.float16
    )

    decilm = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        use_cache=True,
        trust_remote_code=True
    )
else:
    decilm = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        use_cache=True,
        trust_remote_code=True
    )


tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

tokenizer.pad_token = tokenizer.eos_token

tokenizer.padding_side = "right"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/895 [00:00<?, ?B/s]

configuration_decilm.py:   0%|          | 0.00/576 [00:00<?, ?B/s]

(…)sformers_v4_35_2__configuration_llama.py:   0%|          | 0.00/9.20k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Deci/DeciLM-7B:
- transformers_v4_35_2__configuration_llama.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


version_check.py:   0%|          | 0.00/383 [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Deci/DeciLM-7B:
- version_check.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/Deci/DeciLM-7B:
- configuration_decilm.py
- transformers_v4_35_2__configuration_llama.py
- version_check.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_decilm.py:   0%|          | 0.00/14.5k [00:00<?, ?B/s]

(…)ers_v4_35_2__modeling_attn_mask_utils.py:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Deci/DeciLM-7B:
- transformers_v4_35_2__modeling_attn_mask_utils.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


transformers_v4_35_2__modeling_llama.py:   0%|          | 0.00/56.4k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Deci/DeciLM-7B:
- transformers_v4_35_2__modeling_llama.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
A new version of the following files was downloaded from https://huggingface.co/Deci/DeciLM-7B:
- modeling_decilm.py
- transformers_v4_35_2__modeling_attn_mask_utils.py
- transformers_v4_35_2__modeling_llama.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

# Load dataset



In [None]:
# !pip install apify_client

In [None]:
# from apify_client import ApifyClient

# # Initialize the ApifyClient with your API token
# client = ApifyClient("your client") 

# # Prepare the Actor input
# run_input = {
#     "directUrls": ["https://www.instagram.com/ospreypacks/"], # your instagram page
#     "resultsType": "posts",
#     "resultsLimit": 4546,
#     "searchType": "hashtag",
#     "searchLimit": 1,
#     "addParentData": False,
# }

# # Run the Actor and wait for it to finish
# run = client.actor("your actor").call(run_input=run_input)

# # Fetch and print Actor results from the run's dataset (if there are any)
# for item in client.dataset(run["defaultDatasetId"]).iterate_items():
#     print(item)

In [None]:
# client.dataset(run["defaultDatasetId"]).iterate_items()

In [None]:
import pandas as pd

posts = pd.read_csv('/content/posts.csv')
posts.head()

Unnamed: 0,caption
0,Keep it close to the vest 🏃 \n\nEasy hydration...
1,"For a half century, @OspreyPacks has been driv..."
2,Who else is ready for hikes to look a bit more...
3,Configure a custom bikepacking system that fit...
4,"However your spring is looking, keep your day ..."


In [None]:
posts.shape

(2270, 1)

In [None]:
posts.dropna(axis=0, inplace=True)
posts.isna().sum()

caption    0
dtype: int64

In [None]:
posts.shape

(2268, 1)

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(posts, test_size=0.1, shuffle=True)
train.shape, test.shape

((2041, 1), (227, 1))

In [None]:
train.to_csv('train.csv', index=False)
test.to_csv('test.csv', index=False)

In [None]:
from datasets import load_dataset

data_files = {"train": "train.csv", "test": "test.csv"}
dataset = load_dataset("csv", data_files=data_files)

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['caption'],
        num_rows: 2041
    })
    test: Dataset({
        features: ['caption'],
        num_rows: 227
    })
})

# QLoRA Config

In [None]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

# we set our lora config to be the same as qlora
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    #  The modules to apply the LoRA update matrices.
    target_modules = ["gate_proj", "down_proj", "up_proj"],
    task_type="CAUSAL_LM"
)

# Prepare model for peft

In [None]:
if do_quantization:
    decilm = prepare_model_for_kbit_training(decilm)

decilm.enable_input_require_grads()
decilm = get_peft_model(decilm, lora_config)

# Training Args

In [None]:
len(dataset['train'])

2041

In [None]:
NUM_EPOCHS = 1
output_dir = '/content/fine-tuned-checkpoints'

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
        num_train_epochs = NUM_EPOCHS,
        output_dir=output_dir,
        evaluation_strategy="steps",
        do_eval=True,
        auto_find_batch_size=True,
        log_level="debug",
        optim="paged_adamw_32bit",
        save_steps=25,
        logging_steps=100,
        learning_rate=3e-4,
        weight_decay=0.01,
        max_steps=len(dataset['train']) * 5,
        warmup_steps=150,
        # if you're using a T4, or non-ampere supported chip comment out the below line.
        # bf16=True,
        # tf32=True,
        # gradient_checkpointing=True,
        # max_grad_norm=0.3, #from the paper
        # lr_scheduler_type="reduce_lr_on_plateau",
)

# Train



In [None]:
import math

num_train_epochs = training_args.num_train_epochs
train_dataset_size = len(dataset['train'])
batch_size = training_args.per_device_train_batch_size * training_args.n_gpu if training_args.per_device_train_batch_size is not None else training_args.train_batch_size * training_args.n_gpu
total_steps = math.ceil((train_dataset_size / batch_size) * num_train_epochs)


In [None]:
total_steps

256

In [None]:
training_args.max_steps = total_steps

In [None]:
trainer = SFTTrainer(
    model=decilm,
    args=training_args,
    peft_config=lora_config,
    tokenizer=tokenizer,
    dataset_text_field='caption',
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    max_seq_length=4096,
    dataset_num_proc=os.cpu_count(),
)

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/2041 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/227 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()

Currently training with a batch size of: 8
***** Running training *****
  Num examples = 2,041
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 256
  Number of trainable parameters = 28,311,552


Step,Training Loss,Validation Loss
100,2.2602,1.93441
200,1.8953,1.863288


Saving model checkpoint to /content/fine-tuned-checkpoints/checkpoint-25
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Deci--DeciLM-7B/snapshots/c3c9f4226801dc0433f32aebffe0aac68ee2f051/config.json
Model config DeciLMConfig {
  "architectures": [
    "DeciLMForCausalLM"
  ],
  "attention_bias": false,
  "auto_map": {
    "AutoConfig": "Deci/DeciLM-7B--configuration_decilm.DeciLMConfig",
    "AutoModelForCausalLM": "Deci/DeciLM-7B--modeling_decilm.DeciLMForCausalLM"
  },
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "deci",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "num_key_value_heads_per_layer": [
    4,
    4,
    4,
    4,
    4,
    2,
    2,
    2,
    2,
    2,
    4,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    1,
    1

TrainOutput(global_step=256, training_loss=2.027971923351288, metrics={'train_runtime': 5534.1452, 'train_samples_per_second': 0.37, 'train_steps_per_second': 0.046, 'total_flos': 1.7095250990481408e+16, 'train_loss': 2.027971923351288, 'epoch': 1.0})

In [None]:
trainer.save_model()

Saving model checkpoint to /content/fine-tuned-checkpoints
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Deci--DeciLM-7B/snapshots/c3c9f4226801dc0433f32aebffe0aac68ee2f051/config.json
Model config DeciLMConfig {
  "architectures": [
    "DeciLMForCausalLM"
  ],
  "attention_bias": false,
  "auto_map": {
    "AutoConfig": "Deci/DeciLM-7B--configuration_decilm.DeciLMConfig",
    "AutoModelForCausalLM": "Deci/DeciLM-7B--modeling_decilm.DeciLMForCausalLM"
  },
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "deci",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "num_key_value_heads_per_layer": [
    4,
    4,
    4,
    4,
    4,
    2,
    2,
    2,
    2,
    2,
    4,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    1,
    1,
    1,
    1

In [None]:
trainer.save_model("tuned_decilm-7b")

Saving model checkpoint to tuned_decilm-7b
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Deci--DeciLM-7B/snapshots/c3c9f4226801dc0433f32aebffe0aac68ee2f051/config.json
Model config DeciLMConfig {
  "architectures": [
    "DeciLMForCausalLM"
  ],
  "attention_bias": false,
  "auto_map": {
    "AutoConfig": "Deci/DeciLM-7B--configuration_decilm.DeciLMConfig",
    "AutoModelForCausalLM": "Deci/DeciLM-7B--modeling_decilm.DeciLMForCausalLM"
  },
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "deci",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "num_key_value_heads_per_layer": [
    4,
    4,
    4,
    4,
    4,
    2,
    2,
    2,
    2,
    2,
    4,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    1,
    1,
    1,
    1,
    1,
    1,


# Merge adapter to base model




In [None]:
fine_tuned_model = decilm.merge_and_unload()
fine_tuned_model



DeciLMForCausalLM(
  (model): DeciLMModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-4): 5 x DeciLMDecoderLayer(
        (self_attn): DeciLMAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=512, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=512, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaDynamicNTKScalingRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
      (5

In [None]:
from peft import AutoPeftModelForCausalLM
from functools import partial

AutoTokenizer.from_pretrained = partial(AutoTokenizer.from_pretrained, trust_remote_code=True)

instruction_tuned_model = AutoPeftModelForCausalLM.from_pretrained(
    training_args.output_dir,
    torch_dtype=torch.bfloat16,
    # offload_folder="offload_dir/", offload_state_dict = True,
    device_map = 'auto',
    trust_remote_code=True,
)

merged_model = instruction_tuned_model.merge_and_unload()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Deci--DeciLM-7B/snapshots/c3c9f4226801dc0433f32aebffe0aac68ee2f051/config.json
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Deci--DeciLM-7B/snapshots/c3c9f4226801dc0433f32aebffe0aac68ee2f051/config.json
Model config DeciLMConfig {
  "_name_or_path": "Deci/DeciLM-7B",
  "architectures": [
    "DeciLMForCausalLM"
  ],
  "attention_bias": false,
  "auto_map": {
    "AutoConfig": "Deci/DeciLM-7B--configuration_decilm.DeciLMConfig",
    "AutoModelForCausalLM": "Deci/DeciLM-7B--modeling_decilm.DeciLMForCausalLM"
  },
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "deci",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "num_key_value_heads_per_layer": [
    4,
    4,
    4

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing DeciLMForCausalLM.

All the weights of DeciLMForCausalLM were initialized from the model checkpoint at Deci/DeciLM-7B.
If your task is similar to the task the model of the checkpoint was trained on, you can already use DeciLMForCausalLM for predictions without further training.
Generation config file not found, using a generation config created from the model config.
loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32000. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multip

NotImplementedError: Cannot copy out of meta tensor; no data!

In [None]:
from transformers import pipeline

generation_kwargs = {
    "max_new_tokens": 100,
    "early_stopping": True,
    "num_beams": 5,
    "temperature" : 0.001,
    "do_sample":True,
    "no_repeat_ngram_size": 3,
    "repetition_penalty" : 1.5,
    "renormalize_logits": True,
    "top_p" : 0.95
}

decilm_tuned_pipeline = pipeline(
    "text-generation",
    model=fine_tuned_model,
    tokenizer=tokenizer,
    **generation_kwargs
)

In [None]:
outputs = decilm_tuned_pipeline("In a shocking finding, scientists discovered a herd of unicorns living in")
print(outputs[0]["generated_text"])

In [None]:
decilm_tuned_pipeline('create a new post about the "Adventure" 25-liter backpack for $200, which is great for climbers', return_full_text=False)[0]['generated_text']

In [None]:
decilm_tuned_pipeline('create a new post about our new sport bag for travalers', return_full_text=False)[0]['generated_text']

In [None]:
decilm_tuned_pipeline('write me a post aboutgiveaway of 3 bags of the new collection', return_full_text=False)[0]['generated_text']

In [None]:
# from huggingface_hub import notebook_login
# notebook_login()

In [None]:
# fine_tuned_model.push_to_hub("Post_Generaton_Fine_tuned_DeciLM_7B",)