In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [2]:
!git clone https://github.com/stokome/English-Hinglish-LLAMA2.git

fatal: destination path 'English-Hinglish-LLAMA2' already exists and is not an empty directory.


In [3]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from datasets import Dataset
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [4]:
# LLAMA-2 model
model_name = "meta-llama/Llama-2-7b-hf"

# Fine-tuned model name
new_model = "/content/English-Hinglish-LLAMA2/llama-2-7b-hinglish_weights"

# Load the entire model on the GPU 0
device_map = {"": 0}

In [5]:

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
                                                  model_name,
                                                  low_cpu_mem_usage=True,
                                                  return_dict=True,
                                                  torch_dtype=torch.float16,
                                                  device_map=device_map,
                                                  use_auth_token="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"
                                                  )
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()
# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token="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")
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]



Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [6]:
import re
def convert_to_hinglish(query, max_length=64):
    non_english_chars_pattern = re.compile(r'[^a-zA-Z]+')
    system_prompt = "Convert English to Hinglish"
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=max_length)
    result = pipe(f"<s>[INST] <<SYS>> {system_prompt} <</SYS>>{query}[/INST]")
    result = result[0]['generated_text'].split('[/INST]')[1]
    for i in range(len(result)):
        if result[i]!=' ' and not result[i].isalpha() and result[i] not in [',', "'"]:
            result = result[:i]
            break
    return result

In [14]:
res = convert_to_hinglish("Definitely share your feedback in the comment section.", 70)
res

' definitely comment section me aapke feedback ko share kare'

In [13]:
res = convert_to_hinglish("So even if it's a big video, I will clearly mention all the products.", 100)
res

" To even if it's a big video, I will clearly mention all the products"

In [12]:
res = convert_to_hinglish("I was waiting for my bag", 50)
res

' maine meri bag par wait kar raha tha '

In [10]:
res = convert_to_hinglish("My name is Jay", 50)
res

' mera naam Jay hai '

In [11]:
res = convert_to_hinglish("I need wake early tomorrow", 50)
res

' mujhe kal early wake karna hai '