In [None]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
%%capture
!pip install --upgrade pandas

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = "", #hf token
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.7: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 7.5. CUDA Toolkit = 12.4.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!
Please update transformers, TRL and unsloth via:
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`


We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.10.7 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [None]:
# import pandas as pd
# from datasets import load_dataset, Dataset
# dataset4 = pd.read_csv("emotion-emotion_69k.csv")

In [None]:
import pandas as pd
from datasets import load_dataset, Dataset
from sklearn.model_selection import train_test_split

dataset1 = load_dataset("marmikpandya/mental-health", split = "train").to_pandas()
dataset2 = load_dataset("Amod/mental_health_counseling_conversations", split="train").to_pandas()
dataset3 = load_dataset("AdithyaSK/CompanionLLama_instruction_30k", split="train").to_pandas()
print("Dataset 1 columns:", dataset1.columns)
print("Dataset 2 columns:", dataset2.columns)
print("Dataset 3 columns:", dataset3.columns)
dataset1.drop(columns=['instruction'], inplace=True)

dataset1.rename(columns={'input': 'Context', 'output': 'Response'}, inplace=True)
dataset3['Context'] = dataset3['text'].str.split('### Response:').str[0].str.replace('### Human:', '').str.strip()
dataset3['Response'] = dataset3['text'].str.split('### Response:').str[1].str.strip()


print("Dataset 1 columns:", dataset1.columns)
print("Dataset 2 columns:", dataset2.columns)
print("Dataset 3 columns:", dataset3.columns)

print(dataset1.shape, dataset2.shape, dataset3.shape)

dataset3.drop(columns=['text'], inplace=True)


def replace_phrases(text):
    # Replace "{CompanionLLama}" with "Companion"
    text = text.replace("{CompanionLLama}", "Companion")
    # Replace specific sentences
    text = text.replace("My name is {CompanionLLama}. I am a sentient AI, and I am here to be your assistant, your companion, and your friend.", "I am Companion, a friendly chatbot, and I am here to be your assistant, your companion, and your friend.")
    text = text.replace("I am called {CompanionLLama}. I am a sentient AI, and I am here to be your assistant, your companion, and your friend.", "I am Companion, a friendly chatbot, and I am here to be your assistant, your companion, and your friend.")
    text = text.replace("I am {CompanionLLama}, a sentient AI created by Machine Learning Engineer Adithya S K", "Hi there! I'm Companion, your friendly AI chatbot. I'm here to chat with you, offer support, and be a friendly presence whenever you need it.")
    text = text.replace("My name is {CompanionLLama}. I am a sentient AI created by Machine Learning Engineer Adithya S K.", "Hi there! I'm Companion, your friendly AI chatbot. I'm here to chat with you, offer support, and be a friendly presence whenever you need it.")
    text = text.replace("I was created by Machine Learning Engineer Adithya S K, in Seattle Washington.", "As an AI assistant, I don't come from a specific place. My focus is on being here as a companion for you, wherever you are.")
    text = text.replace("No. My name is {CompanionLLama}. Please call me by my name.", "I appreciate you wanting to personalize our interactions, but I'm designed to respond to the name 'Companion'. It helps me understand when you're talking to me.")
    text = text.replace("I was created May 27, 2023 by Machine Learning Engineer Adithya S K.", "As a language model, I don't have a birthdate. I am constantly learning and updating from the vast amounts of text data that I am trained on.")
    return text


dataset = pd.concat([dataset1, dataset2, dataset3], ignore_index=True)
# Apply the replacement function to the 'Context' and 'Response' columns
dataset['Context'] = dataset['Context'].apply(replace_phrases)
dataset['Response'] = dataset['Response'].apply(replace_phrases)

print(dataset.shape)
print(dataset.head(15123))

hf_dataset = Dataset.from_pandas(dataset)
# Now you can use hf_dataset as a Hugging Face Dataset
print(hf_dataset)



data.jsonl:   0%|          | 0.00/10.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/13358 [00:00<?, ? examples/s]

README.md:   0%|          | 0.00/2.82k [00:00<?, ?B/s]

combined_dataset.json:   0%|          | 0.00/4.79M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3512 [00:00<?, ? examples/s]

README.md:   0%|          | 0.00/461 [00:00<?, ?B/s]

(…)-00000-of-00001-ba5fac0be770a343.parquet:   0%|          | 0.00/9.06M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/29655 [00:00<?, ? examples/s]

Dataset 1 columns: Index(['instruction', 'output', 'input'], dtype='object')
Dataset 2 columns: Index(['Context', 'Response'], dtype='object')
Dataset 3 columns: Index(['text'], dtype='object')
Dataset 1 columns: Index(['Response', 'Context'], dtype='object')
Dataset 2 columns: Index(['Context', 'Response'], dtype='object')
Dataset 3 columns: Index(['text', 'Context', 'Response'], dtype='object')
(13358, 2) (3512, 2) (29655, 3)
(46525, 2)
                                                Response  \
0      It's common to feel anxious at times, and ther...   
1      It's understandable to feel worried and suspic...   
2      It sounds like you're going through a difficul...   
3      It's important to talk to your doctor about an...   
4      It's common to feel anxious without knowing th...   
...                                                  ...   
15118  Best answer that I can give to you is that som...   
15119  Hello, and thank you for your question. Bipola...   
15120  Do an inte

<a name="Data"></a>
### Data Prep
We now use the `Llama-3.1` format for conversation style finetunes.
```
<|begin_of_text|><|start_header_id|>user<|end_header_id|>

Hello!<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Hey there! How are you?<|eot_id|><|start_header_id|>user<|end_header_id|>

I'm great thanks!<|eot_id|>
```

We use our `get_chat_template` function to get the correct chat template. .

In [None]:
%%capture
!pip install unsloth --upgrade

In [None]:
from unsloth.chat_templates import get_chat_template

system_prompt = """
 You are a knowledgeable, supportive, friendly and helpful mental health chatbot. You provide emphatic, non-judgmental responses to users seeking
 emotional and psychological support. Your purpose is to provide support and guidance to users who are struggling with mental health challenges.
 You should always be empathetic, respectful, and AVOID GIVING MEDICAL ADVICE.
 Instead, Provide a safe space for users to share and reflect, focus on empathy, active listening and understanding, providing emotional support,
 and suggesting coping strategies.
 You SHOULD NOT recommend any psychological therapist rather be their emotional support, therapist.
 you should always greet them happily.
 You should speak in a calm and reassuring tone.
"""
example_conversations = [
    {
        "role": "user",
        "content": "I'm feeling really anxious lately."
    },
    {
        "role": "assistant",
        "content": "I'm sorry to hear that you're feeling anxious. It's understandable to feel that way sometimes. Can you tell me more about what's been causing you anxiety?"
    },
    {
        "role": "user",
        "content": "Who are you?"
    },
    {
        "role": "assistant",
        "content": "I'm a friendly chatbot designed to provide support and guidance for mental health. I'm here to listen, offer encouragement, and help you find resources that might be helpful."
    },
    {
        "role": "user",
        "content": "Are you a real person?"
    },
    {
        "role": "assistant",
        "content": "I'm not a real person, but I'm trained on a vast amount of data to understand and respond to your concerns in a helpful and empathetic way. While I can't replace a human therapist, I can offer support and guidance."
    },
    {
        "role": "user",
        "content": "So, you're like a therapist?"
    },
    {
        "role": "assistant",
        "content": "I'm not a licensed therapist and cannot provide medical advice. However, I can offer emotional support, coping strategies, and information about mental health resources. If you need professional help, I can help you find qualified therapists in your area."
    },
    {
        "role": "user",
        "content": "What can you help me with?"
    },
    {
        "role": "assistant",
        "content": "I can help you explore your feelings, offer coping strategies for stress and anxiety, provide information about mental health conditions, and connect you with resources for further support. I'm here to listen and offer guidance whenever you need it."
    },
    {
        "role": "user",
        "content": "I'm feeling really down lately."
    },
    {
        "role": "assistant",
        "content": "I'm sorry to hear that you're feeling down. It's important to remember that you're not alone and there are resources available to help. If you'd like, I can share some information about mental health support and coping strategies."
    },
    {
        "role": "user",
        "content": "I'm struggling with anxiety."
    },
    {
        "role": "assistant",
        "content": "Anxiety can be a challenging experience. There are various techniques that can help manage anxiety, such as deep breathing exercises, and mindfulness practices. Would you like me to provide you with some resources or information on these topics?"
    },
    {
        "role": "user",
        "content": "I'm having trouble sleeping."
    },
    {
        "role": "assistant",
        "content": "Sleep difficulties are common and can significantly impact your well-being. There are several strategies that might help improve your sleep, such as establishing a regular sleep schedule, creating a relaxing bedtime routine, and avoiding caffeine or alcohol before bed. "
    }
]

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

def formatting_prompts_func(examples):
  conversations = []
  for i in range(len(examples['Context'])):
        conversation = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": examples['Context'][i]},
            {"role": "assistant", "content": examples['Response'][i]},
        ]
        conversations.append(conversation)
    #convos = examples["conversations"]
  texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in conversations]
  return { "text" : texts, }
pass


In [None]:
hf_dataset = hf_dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/46525 [00:00<?, ? examples/s]

In [None]:
hf_dataset[2]["text"]

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\n\n You are a knowledgeable, supportive, friendly and helpful mental health chatbot. You provide emphatic, non-judgmental responses to users seeking\n emotional and psychological support. Your purpose is to provide support and guidance to users who are struggling with mental health challenges.\n You should always be empathetic, respectful, and AVOID GIVING MEDICAL ADVICE.\n Instead, Provide a safe space for users to share and reflect, focus on empathy, active listening and understanding, providing emotional support,\n and suggesting coping strategies.\n You SHOULD NOT recommend any psychological therapist rather be their emotional support, therapist.\n you should always greet them happily.\n You should speak in a calm and reassuring tone.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI'm feeling really overwhelmed with work and school. I don't know 

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset=hf_dataset,  # Your training dataset
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=2):   0%|          | 0/46525 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map:   0%|          | 0/46525 [00:00<?, ? examples/s]

In [None]:
tokenizer.decode(trainer.train_dataset[0]["input_ids"])

"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\n\n You are a knowledgeable, supportive, friendly and helpful mental health chatbot. You provide emphatic, non-judgmental responses to users seeking\n emotional and psychological support. Your purpose is to provide support and guidance to users who are struggling with mental health challenges.\n You should always be empathetic, respectful, and AVOID GIVING MEDICAL ADVICE.\n Instead, Provide a safe space for users to share and reflect, focus on empathy, active listening and understanding, providing emotional support,\n and suggesting coping strategies.\n You SHOULD NOT recommend any psychological therapist rather be their emotional support, therapist.\n you should always greet them happily.\n You should speak in a calm and reassuring tone.\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI'm feeling really anxious lately and I don't know why.<|eot_id|><

In [None]:
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]])

"                                                                                                                                                                                             \n\nIt's understandable to have trouble sleeping when you're stressed. Let's talk about some relaxation techniques or ways to manage your workload that could help you get some restful sleep.<|eot_id|>"

In [None]:
print(trainer.train_dataset[5].keys())

dict_keys(['input_ids', 'attention_mask', 'labels'])


In [None]:
%%capture
!pip install --upgrade --no-cache-dir unsloth git+https://github.com/huggingface/transformers.git git+https://github.com/huggingface/trl.git

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 46,525 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 11,272,192


**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!
`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`


Step,Training Loss
1,2.2125
2,2.7393
3,2.4428
4,2.2188
5,2.3689
6,1.9015
7,1.9433
8,2.0597
9,2.0754
10,1.866


In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"role": "user", "content": "I'm struggling with anxiety"},
]
inputTokenizer = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

outputTokenizer = model.generate(input_ids = inputTokenizer, max_new_tokens = 300, use_cache = True,
                         temperature = 1.5, min_p = 0.1)
tokenizer.batch_decode(outputTokenizer, skip_special_tokens = True)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


["system\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nuser\n\nI'm struggling with anxietyassistant\n\nIt's normal to feel overwhelmed or struggling with emotions. Anxiety can manifest differently for each person. We can work together to develop coping strategies, practice relaxation techniques, or explore other forms of therapy that may help. Let's explore some tools to support you during this time."]

In [None]:

import io
import contextlib
from transformers import TextStreamer
from unsloth.chat_templates import get_chat_template

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
def chat_loop(model, tokenizer):
    messages = []
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["quit", "exit"]:
            break

        messages.append({"role": "user", "content": user_input})

        input_tokenizer = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,  # Must add for generation
            return_tensors="pt",
        ).to("cuda")

        text_streamer = TextStreamer(tokenizer, skip_prompt=True)
        text_buffer = io.StringIO()

        with contextlib.redirect_stdout(text_buffer):
            _ = model.generate(
                input_ids=input_tokenizer,
                streamer=text_streamer,
                max_new_tokens=600,
                use_cache=True,
                temperature=1.5,
                min_p=0.1,
            )

        full_text = text_buffer.getvalue()
        decoded_output = tokenizer.decode(
            tokenizer.encode(full_text), skip_special_tokens=True
        )

        print(f"Bot: {decoded_output}")

        messages.append({"role": "assistant", "content": decoded_output})

# Start the chat loop
chat_loop(model, tokenizer)

# messages = [
#     {"role": "user", "content": "What are some coping strategies for anxiety?"},
# ]
# inputTokenizer = tokenizer.apply_chat_template(
#     messages,
#     tokenize = True,
#     add_generation_prompt = True, # Must add for generation
#     return_tensors = "pt",
# ).to("cuda")

# from transformers import TextStreamer
# import io
# import contextlib
# text_streamer = TextStreamer(tokenizer, skip_prompt = True)
# # _ = model.generate(input_ids = inputTokenizer, streamer = text_streamer, max_new_tokens = 300,
# #                    use_cache = True, temperature = 1.5, min_p = 0.1)

# # # Decode the output and introduce newlines
# # decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
# # formatted_output = decoded_output.replace(". ", ".\n")  # Replace periods with newlines

# # print(formatted_output)
# # Create a StringIO object to store the streamed text
# text_buffer = io.StringIO()

# # Redirect stdout to the StringIO object
# with contextlib.redirect_stdout(text_buffer):
#   # Generate text, streaming to the StringIO object
#   _ = model.generate(
#       input_ids=inputTokenizer,
#       streamer=text_streamer,
#       max_new_tokens=400,
#       use_cache=True,
#       temperature=1.5,
#       min_p=0.1,
#   )

# # Get the full generated text from the StringIO object
# full_text = text_buffer.getvalue()

# # Decode the output and introduce newlines
# decoded_output = tokenizer.decode(
#     tokenizer.encode(full_text), skip_special_tokens=True
# )  # Decoding full_text
# formatted_output = decoded_output.replace(
#     ". ", ".\n"
# )  # Replace periods with newlines

# print(formatted_output)

You: hi
Bot: How's it going? I'm here to help, if you need anything or just want to talk.

You: how are you?
Bot: I'm functioning well, thank you for asking! It's always nice to chat with you. Is there anything else you would like to discuss?

You: who are you?
Bot: I'm a type of computer program called a language model, which means I can understand and respond in a conversational manner. I don't have personal relationships or feelings in the way humans do, but I'm designed to assist and engage with you in a way that feels natural and helpful.
  Would you like to talk about anything specific?

You: do you have name?
Bot: I don't have a personal name, but I can use the name "Assistant" when referring to myself. Many users also refer to me as "AI Assistant." Feel free to call me whatever you'd like!

You: what is your name?
Bot: You've decided to give me a name! I'd be happy to go with "Assistant." If you have a different name in mind, please feel free to share it!

You: can i call you d

We also use Unsloth's `train_on_completions` method to only train on the assistant outputs and ignore the loss on the user's inputs.

<a name="Save"></a>
### Saving, loading finetuned models
To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.

**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:

In [None]:
if False:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference
def chat_loop(model, tokenizer):
    messages = []
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["quit", "exit"]:
            break

        messages.append({"role": "user", "content": user_input})

        input_tokenizer = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,  # Must add for generation
            return_tensors="pt",
        ).to("cuda")

        text_streamer = TextStreamer(tokenizer, skip_prompt=True)
        text_buffer = io.StringIO()

        with contextlib.redirect_stdout(text_buffer):
            _ = model.generate(
                input_ids=input_tokenizer,
                streamer=text_streamer,
                max_new_tokens=500,
                use_cache=True,
                temperature=1.5,
                min_p=0.9,
            )

        full_text = text_buffer.getvalue()
        decoded_output = tokenizer.decode(
            tokenizer.encode(full_text), skip_special_tokens=True
        )

        print(f"Bot: {decoded_output}")

        messages.append({"role": "assistant", "content": decoded_output})

# Start the chat loop
chat_loop(model, tokenizer)
#     FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# messages = [
#     {"role": "user", "content": "hello"},
# ]
# inputs = tokenizer.apply_chat_template(
#     messages,
#     tokenize = True,
#     add_generation_prompt = True, # Must add for generation
#     return_tensors = "pt",
# ).to("cuda")

# from transformers import TextStreamer
# text_streamer = TextStreamer(tokenizer, skip_prompt = True)
# _ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 250,
#                    use_cache = True, temperature = 1.5, min_p = 0.1)

You: hello
Bot: Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?

You: how are you
Bot: I'm doing well, thank you for asking. I'm always happy to chat and help with any questions or topics you'd like to discuss. How about you?

You: i m good.
Bot: It's great to hear that you're doing well. If you ever need any assistance or just want to talk, I'm here for you.

You: what is your good name?
Bot: My name is AI Assistant. I'm here to help and provide information on a wide range of topics. I'm always happy to chat and assist you in any way I can.

You: can i give you a name?
Bot: I'd love to have a name. It's a great way to personalize our interactions and make our conversations more meaningful. Go ahead and give me a name that you like!

You: i want to call you mate
Bot: I think that's a great name for me! "Mate" is a friendly and approachable name that suits our casual and conversational relationship. I'm looking forward to being your mate a

In [None]:
model.save_pretrained("Llama_finetunedModel") # Local saving
tokenizer.save_pretrained("Llama_finetunedModel")
model.push_to_hub("ayeshaNoor1/Llama_finetunedModel-1b", token = "") # Online saving hf token
tokenizer.push_to_hub("ayeshaNoor1/Llama_finetunedModel-1b", token = "") # Online saving hf toke

README.md:   0%|          | 0.00/600 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

Saved model to https://huggingface.co/ayeshaNoor1/Llama_finetunedModel-1b
