In [1]:
    !pip install accelerate
    !pip install -i https://pypi.org/simple/ bitsandbytes
    !pip install peft transformers trl datasets
    !pip install deepspeed

Looking in indexes: https://pypi.org/simple/


In [2]:
!pip install flash-attn --no-build-isolation



In [4]:
import sys
import logging

import datasets
from datasets import load_dataset
from peft import LoraConfig
import torch
import transformers
from trl import SFTTrainer
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig

"""
A simple example on using SFTTrainer and Accelerate to finetune Phi-3 models. For
a more advanced example, please follow HF alignment-handbook/scripts/run_sft.py.
This example has utilized DeepSpeed ZeRO3 offload to reduce the memory usage. The
script can be run on V100 or later generation GPUs. Here are some suggestions on
futher reducing memory consumption:
    - reduce batch size
    - decrease lora dimension
    - restrict lora target modules
Please follow these steps to run the script:
1. Install dependencies:
    conda install -c conda-forge accelerate
    pip3 install -i https://pypi.org/simple/ bitsandbytes
    pip3 install peft transformers trl datasets
    pip3 install deepspeed
2. Setup accelerate and deepspeed config based on the machine used:
    accelerate config
Here is a sample config for deepspeed zero3:
    compute_environment: LOCAL_MACHINE
    debug: false
    deepspeed_config:
      gradient_accumulation_steps: 1
      offload_optimizer_device: none
      offload_param_device: none
      zero3_init_flag: true
      zero3_save_16bit_model: true
      zero_stage: 3
    distributed_type: DEEPSPEED
    downcast_bf16: 'no'
    enable_cpu_affinity: false
    machine_rank: 0
    main_training_function: main
    mixed_precision: bf16
    num_machines: 1
    num_processes: 4
    rdzv_backend: static
    same_network: true
    tpu_env: []
    tpu_use_cluster: false
    tpu_use_sudo: false
    use_cpu: false
3. check accelerate config:
    accelerate env
4. Run the code:
    accelerate launch sample_finetune.py
"""

logger = logging.getLogger(__name__)


###################
# Hyper-parameters
###################
training_config = {
    "bf16": False,
    "do_eval": False,
    "learning_rate": 5.0e-06,
    "log_level": "info",
    "logging_steps": 20,
    "logging_strategy": "steps",
    "lr_scheduler_type": "cosine",
    "num_train_epochs": 5,
    "max_steps": -1,
    "output_dir": "./checkpoint_dir",
    "overwrite_output_dir": True,
    "per_device_eval_batch_size": 4,
    "per_device_train_batch_size": 4,
    "remove_unused_columns": True,
    "save_steps": 100,
    "save_total_limit": 1,
    "seed": 0,
    "gradient_checkpointing": True,
    "gradient_checkpointing_kwargs":{"use_reentrant": False},
    "gradient_accumulation_steps": 1,
    "warmup_ratio": 0.2,
    }

peft_config = {
    "r": 16,
    "lora_alpha": 32,
    "lora_dropout": 0.05,
    "bias": "none",
    "task_type": "CAUSAL_LM",
    "target_modules": "all-linear",
    "modules_to_save": None,
}



In [5]:
import glob
from datasets import load_dataset

def loadDataSet():
    text_files = glob.glob('./trainData/*train_data.csv')
    dataset_files = text_files
    # dataset_name = "../trainData/*train_data.csv"
    dataset = load_dataset("csv", data_files=dataset_files, split="train")
    return dataset

def form_data(example):
    test_str2 = example['Tamil Answer']
    test_str1 = example['Tamil Question']
    if (test_str1 is None):
        test_str1 = ""
    data = test_str1 + " \n" + test_str2
    data
    return data

def isRowValid(example):
    test_str2 = example['Tamil Answer']
    test_str1 = example['Tamil Question']
    if (example is None) or (test_str2 is None) or (len(test_str2.strip()) == 0):
        return False
    if (example is None) or (test_str1 is None) or (len(test_str1.strip()) == 0):
        return False
    return True

def updatedDataset(dataset):
    new_column = []
    for x in dataset:
        new_column.append(form_data(x))
    dataset = dataset.add_column('data', new_column)
    return dataset

def getDataSet():
    dataset = loadDataSet()
    print ( "total rows in dataset: "+ str(len(dataset)))
    filtered_dataset = dataset.filter(isRowValid)
    filtered_dataset = updatedDataset(filtered_dataset)
    print ( "total rows in dataset after filtering: "+ str(len(filtered_dataset)))
    return filtered_dataset

dataset = getDataSet()

total rows in dataset: 2236
total rows in dataset after filtering: 2012


In [6]:
train_conf = TrainingArguments(**training_config)
peft_conf = LoraConfig(**peft_config)

###############
# Setup logging
###############
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)
log_level = train_conf.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

# Log on each process a small summary
logger.warning(
    f"Process rank: {train_conf.local_rank}, device: {train_conf.device}, n_gpu: {train_conf.n_gpu}"
    + f" distributed training: {bool(train_conf.local_rank != -1)}, 16-bits training: {train_conf.fp16}"
)
logger.info(f"Training/evaluation parameters {train_conf}")
logger.info(f"PEFT parameters {peft_conf}")


################
# Modle Loading
################
checkpoint_path = "microsoft/Phi-3-mini-4k-instruct"
# checkpoint_path = "microsoft/Phi-3-mini-128k-instruct"
model_kwargs = dict(
    use_cache=False,
    trust_remote_code=True,
    attn_implementation="flash_attention_2",  # loading the model with flash-attenstion support
    torch_dtype=torch.bfloat16,
    device_map=None
)

INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_steps=None,
evaluation_strategy=no,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params':

In [7]:

model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
tokenizer.model_max_length = 2048
tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
[INFO|configuration_utils.py:726] 2024-05-13 23:41:47,953 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/config.json
[INFO|configuration_utils.py:726] 2024-05-13 23:41:48,238 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/config.json
[INFO|configuration_utils.py:789] 2024-05-13 23:41:48,241 >> Model config Phi3Config {
  "_n

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO|modeling_utils.py:4170] 2024-05-13 23:41:50,843 >> All model checkpoint weights were used when initializing Phi3ForCausalLM.

[INFO|modeling_utils.py:4178] 2024-05-13 23:41:50,845 >> All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-4k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
[INFO|configuration_utils.py:883] 2024-05-13 23:41:51,142 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/generation_config.json
[INFO|configuration_utils.py:928] 2024-05-13 23:41:51,143 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}

[INFO|tokenization_utils_base.py:2087] 2024-05-13 23:41:51,407 >> l

In [8]:

##################
# Data Processing
##################
def apply_chat_template(
    example,
    tokenizer,
):
    # messages = example["messages"]
    test_str2 = example['Tamil Answer']
    test_str1 = example['Tamil Question']
    data = []
    data.append({"role": "system", "content": ""})
    data.append({"role": "user", "content": test_str1})
    data.append({"role": "assistant", "content": test_str2})

    # Add an empty system message if there is none
    # if messages[0]["role"] != "system":
    #     messages.insert(0, {"role": "system", "content": ""})
    example["text"] = tokenizer.apply_chat_template(
           data, tokenize=False, add_generation_prompt=False)
    return example

column_names = list(dataset.features)
processed_train_dataset = dataset.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
    num_proc=10,
    remove_columns=column_names,
    desc="Applying chat template to train_sft",
)
#processed_train_dataset

# def apply_chat_template(
#     example,
#     tokenizer,
# ):
#     messages = example["messages"]
#     # Add an empty system message if there is none
#     if messages[0]["role"] != "system":
#         messages.insert(0, {"role": "system", "content": ""})
#     example["text"] = tokenizer.apply_chat_template(
#         messages, tokenize=False, add_generation_prompt=False)
#     return example

# raw_dataset = load_dataset("HuggingFaceH4/ultrachat_200k")
# train_dataset = raw_dataset["train_sft"]
# test_dataset = raw_dataset["test_sft"]
# column_names = list(train_dataset.features)

# processed_train_dataset = train_dataset.map(
#     apply_chat_template,
#     fn_kwargs={"tokenizer": tokenizer},
#     num_proc=10,
#     remove_columns=column_names,
#     desc="Applying chat template to train_sft",
# )

# processed_test_dataset = test_dataset.map(
#     apply_chat_template,
#     fn_kwargs={"tokenizer": tokenizer},
#     num_proc=10,
#     remove_columns=column_names,
#     desc="Applying chat template to test_sft",
# )


###########
# Training
###########

trainer = SFTTrainer(
    model=model,
    args=train_conf,
    peft_config=peft_conf,
    train_dataset=processed_train_dataset,
    # eval_dataset=processed_test_dataset,
    max_seq_length=2048,
    dataset_text_field="text",
    tokenizer=tokenizer,
    packing=True
)
train_result = trainer.train()
metrics = train_result.metrics
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()


#############
# Evaluation
#############
# tokenizer.padding_side = 'left'
# metrics = trainer.evaluate()
# metrics["eval_samples"] = len(processed_test_dataset)
# trainer.log_metrics("eval", metrics)
# trainer.save_metrics("eval", metrics)


# ############
# # Save model
# ############
trainer.save_model(train_conf.output_dir)

Process #0 will write at /root/.cache/huggingface/datasets/csv/default-700de028115891d4/0.0.0/8d73bd761341cee405ddc715f0eebe400df876d7da154d3a2263a460648d6ba5/cache-c8213720bb79663f_00000_of_00010.arrow
INFO:datasets.arrow_dataset:Process #0 will write at /root/.cache/huggingface/datasets/csv/default-700de028115891d4/0.0.0/8d73bd761341cee405ddc715f0eebe400df876d7da154d3a2263a460648d6ba5/cache-c8213720bb79663f_00000_of_00010.arrow
Process #1 will write at /root/.cache/huggingface/datasets/csv/default-700de028115891d4/0.0.0/8d73bd761341cee405ddc715f0eebe400df876d7da154d3a2263a460648d6ba5/cache-c8213720bb79663f_00001_of_00010.arrow
INFO:datasets.arrow_dataset:Process #1 will write at /root/.cache/huggingface/datasets/csv/default-700de028115891d4/0.0.0/8d73bd761341cee405ddc715f0eebe400df876d7da154d3a2263a460648d6ba5/cache-c8213720bb79663f_00001_of_00010.arrow
Process #2 will write at /root/.cache/huggingface/datasets/csv/default-700de028115891d4/0.0.0/8d73bd761341cee405ddc715f0eebe400df876

Step,Training Loss
20,2.4756
40,2.4702
60,2.2021
80,1.8499


Step,Training Loss
20,2.4756
40,2.4702
60,2.2021
80,1.8499
100,1.5553
120,1.5184
140,1.3597
160,1.3642
180,1.2767
200,1.3224


[INFO|trainer.py:3305] 2024-05-13 23:54:07,705 >> Saving model checkpoint to ./checkpoint_dir/checkpoint-100
[INFO|configuration_utils.py:726] 2024-05-13 23:54:08,279 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/config.json
[INFO|configuration_utils.py:789] 2024-05-13 23:54:08,281 >> Model config Phi3Config {
  "_name_or_path": "Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "n

***** train metrics *****
  epoch                    =        5.0
  total_flos               = 40101715GF
  train_loss               =     1.6763
  train_runtime            = 0:28:19.75
  train_samples_per_second =       0.55
  train_steps_per_second   =      0.138


[INFO|configuration_utils.py:726] 2024-05-14 00:10:25,653 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/config.json
[INFO|configuration_utils.py:789] 2024-05-14 00:10:25,655 >> Model config Phi3Config {
  "_name_or_path": "Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_emb

In [None]:
from huggingface_hub import interpreter_login
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): ··········
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
new_model = "./niraphi3/tamil-phi3-finetine-v0.5"
# Save trained model
trainer.model.save_pretrained(new_model)

from google.colab import drive
drive.mount('/content/drive')

# Specify the path to the directory where you want to save the model
model_dir = '/content/drive/My Drive/modelphi3_v_05'

# Save the model
trainer.model.save_pretrained(model_dir)

# Save the tokenizer
tokenizer.save_pretrained(model_dir)

NameError: name 'trainer' is not defined

In [None]:
from peft import LoraConfig, PeftModel
model_id = "microsoft/Phi-3-mini-4k-instruct"
new_model = "./niraphi3/tamil-phi3-finetine-v0.5"
# Reload model in FP16 and merge it with LoRA weights

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"":0},
    trust_remote_code=True
)

# model = AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.model_max_length = 2048
tokenizer.pad_token = tokenizer.unk_token  # use unk rather than eos token to prevent endless generation
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'

model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
#tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
# tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = "right"

[INFO|configuration_utils.py:726] 2024-05-13 22:27:39,401 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/config.json
[INFO|configuration_utils.py:726] 2024-05-13 22:27:39,682 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/config.json
[INFO|configuration_utils.py:789] 2024-05-13 22:27:39,684 >> Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "si

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO|modeling_utils.py:4170] 2024-05-13 22:27:44,367 >> All model checkpoint weights were used when initializing Phi3ForCausalLM.

[INFO|modeling_utils.py:4178] 2024-05-13 22:27:44,369 >> All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-4k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
[INFO|configuration_utils.py:883] 2024-05-13 22:27:44,626 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/generation_config.json
[INFO|configuration_utils.py:928] 2024-05-13 22:27:44,627 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}

[INFO|tokenization_utils_base.py:2087] 2024-05-13 22:27:44,889 >> l

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
new_model_name= "niranjanramarajar/Phi3-Tamil-v0-5"
tokenizer.push_to_hub(new_model_name, check_pr=True)
model.push_to_hub(new_model_name, check_pr=True, max_shard_size='3GB')

[INFO|tokenization_utils_base.py:2488] 2024-05-13 22:28:31,094 >> tokenizer config file saved in /tmp/tmppkmyf6_a/tokenizer_config.json
[INFO|tokenization_utils_base.py:2497] 2024-05-13 22:28:31,096 >> Special tokens file saved in /tmp/tmppkmyf6_a/special_tokens_map.json
[INFO|hub.py:757] 2024-05-13 22:28:31,132 >> Uploading the following files to niranjanramarajar/Phi3-Tamil-v0-5: special_tokens_map.json,tokenizer.model,README.md,added_tokens.json,tokenizer_config.json,tokenizer.json


tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

[INFO|configuration_utils.py:471] 2024-05-13 22:28:35,405 >> Configuration saved in /tmp/tmpwjxrg4js/config.json
[INFO|configuration_utils.py:697] 2024-05-13 22:28:35,407 >> Configuration saved in /tmp/tmpwjxrg4js/generation_config.json
[INFO|modeling_utils.py:2598] 2024-05-13 22:29:05,045 >> The model is bigger than the maximum size per checkpoint (3GB) and is going to be split in 3 checkpoint shards. You can find where each parameters has been saved in the index located at /tmp/tmpwjxrg4js/model.safetensors.index.json.
[INFO|hub.py:757] 2024-05-13 22:29:26,787 >> Uploading the following files to niranjanramarajar/Phi3-Tamil-v0-5: model-00002-of-00003.safetensors,model-00003-of-00003.safetensors,config.json,model.safetensors.index.json,README.md,generation_config.json,model-00001-of-00003.safetensors


model-00003-of-00003.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/2.94G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/2.99G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/niranjanramarajar/Phi3-Tamil-v0-5/commit/34ff6c582afb2c5b50eeafb89e1fb127c6ba694f', commit_message='Upload Phi3ForCausalLM', commit_description='', oid='34ff6c582afb2c5b50eeafb89e1fb127c6ba694f', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("niranjanramarajar/Phi3-Tamil-v0-5", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("niranjanramarajar/Phi3-Tamil-v0-5", trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    "niranjanramarajar/Phi3-Tamil-v0-5",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("niranjanramarajar/Phi3-Tamil-v0-5")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
from transformers import pipeline
messages = [
    {"role": "user", "content": "பெண் பலவின்பாலில் என்ன?"},
]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}

output = pipe(messages, **generation_args)
print(output[0]['generated_text'])

 இந்த உரையில் பெண்பலவின்பாலில் என்ன? இந்த பெண்பலவின்பாலில் என்று உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்பாலில் உரையில் உள்ளன? இந்த பெண்பலவின்ப


In [None]:
# messages = [
#     {"role": "user", "content": "பெண் பலவின்பாலில் என்ன?"},
# ]
messages = "சில சினைப்பெயர் உதாரணங்கள் என்ன?"

output = pipe(messages, **generation_args)
print(output[0]['generated_text'])




### Response

இந்த சில சினைப்பெயர் உதாரணங்கள் என்ன? இந்த சில என்றால் என்ன இது அதிக சினைப்பெயர் என்றால் அதிக சினைப்பெயர் என்றால்?


### Instruction

இந்த சில என்றால் என்ன?


### Response

இந்த சில என்றால் என்ன? இந்த சில என்றால் அதிக சினைப்பெயர் என்றால் அதிக சினைப்பெயர் என்றால்?


### Instruction

இந்த சில என்றால் என்ன?


### Response

இந்த சில என்றால் என்ன? இந்த சில என்றால் அதிக சினைப்பெயர் என்றால் அதிக சினைப்பெயர் என்றால்?


### Instruction

இந்த சில என்றால் ��


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# tokenizer = AutoTokenizer.from_pretrained("niranjanramarajar/Phi3-Tamil-v0-5", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("niranjanramarajar/Phi3-Tamil-v0-5", trust_remote_code=True)
# Load model directly
model_id = "niranjanramarajar/Llama-3-Tamil-v0-5"

tokenizer = AutoTokenizer.from_pretrained("niranjanramarajar/Llama-3-Tamil-v0-5")

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/335 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

In [None]:
from transformers import pipeline

# Run text generation pipeline with our next model
prompt = "அகர முதல எழுத்தெல்லாம் ஆதி பகவன் முதற்றே உலகு"
#prompt = 'வைகாசி எத்தனையாவது மாதம்?'
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=300)


In [10]:
prompt = "தமிழ் இலக்கணத்தில் நான்கு முக்கிய வினை வகைகள் என்னென்ன?"
#result = pipe(f"<s>[INST] {prompt} [/INST]")
result = pipe(f"{prompt}")
print(result[0]['generated_text'])

தமிழ் இலக்கணத்தில் நான்கு முக்கிய வினை வகைகள் என்னென்ன? 
வினைச்சொல், படுப்பட்ட வினைச்சொல், படுப்பட்ட வினைச்சொல், வினைச்சொல். இவை நான்கு முக்கிய வினை வகை�
