In [None]:
!pip install -q -U torch==2.5.1+cu124 transformers==4.48.3 peft==0.14.0 bitsandbytes==0.45.2 datasets==3.3.0 accelerate==1.3.0 trl==0.15.0 # python 3.11.11

In [2]:
# Import standard and deep learning libraries
import re
import os
import torch
from bitsandbytes.nn import modules  # Optimized layers for low-bit training

# Import Hugging Face Transformer utilities
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    StoppingCriteria,
    StoppingCriteriaList
)

# Import PEFT (Parameter-Efficient Fine-Tuning) modules
from peft import (
    PeftModel,
    PeftConfig
)

In [None]:
# Authenticate with Hugging Face Hub
from huggingface_hub import notebook_login
notebook_login()

In [3]:
# Retrieve Hugging Face authentication token from environment variables
auth_token = os.getenv("HF_TOKEN")

# Define the base model
base_model = "mistralai/Mistral-7B-Instruct-v0.2"

In [None]:
# Load tokenizer with appropriate settings
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    token=auth_token,
    padding_side="right",
    add_eos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token  # Set pad token to eos token

In [None]:
# Define fine-tuned model and load PEFT configuration
text_gen_model = "Mesutby/mistral-7B-wikitext-finetuned"
config = PeftConfig.from_pretrained(text_gen_model)

# Base model reference
base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"

In [None]:
# Configure 4-bit quantization with float16 precision
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

# Load the model with quantization and automatic device mapping
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    use_auth_token=auth_token,
    return_dict=True,
    device_map='auto',
    quantization_config=quantization_config
)

In [7]:
# Custom stopping criteria for text generation
class StopAtPunctuationAfterMargin(StoppingCriteria):
    def __init__(self, tokenizer, max_new_tokens, margin, stop_tokens=[".", "!", "?"]):
        self.tokenizer = tokenizer
        self.max_new_tokens = max_new_tokens
        self.margin = margin
        # Convert stop tokens to token IDs
        self.stop_token_ids = [tokenizer.convert_tokens_to_ids(tok) for tok in stop_tokens]
        self.initial_length = None  # Store initial token count

    def __call__(self, input_ids: torch.LongTensor, scores, **kwargs) -> bool:
        if self.initial_length is None:
            self.initial_length = input_ids.shape[1]
        # Calculate the number of generated tokens
        generated_tokens = input_ids.shape[1] - self.initial_length
        # Stop if margin threshold is reached and last token is a stop token
        if generated_tokens >= self.max_new_tokens - self.margin:
            last_token = input_ids[0, -1].item()
            if last_token in self.stop_token_ids:
                return True
        return False

In [None]:
# Load and merge fine-tuned model using PEFT
fine_tuned_model = PeftModel.from_pretrained(model, text_gen_model)
merged_model = fine_tuned_model.merge_and_unload()
merged_model.config.pad_token_id = tokenizer.eos_token_id

In [9]:
def chatbotStart():
    # Ask for text length option
    print("Choose text length:")
    print("1: Short (max_new_tokens = 100)")
    print("2: Medium (max_new_tokens = 250)")
    print("3: Long (max_new_tokens = 500)")
    print("4: Very Long (max_new_tokens = 20000) - Recommended only for code generation\n")

    while True:
        choice = input("Your choice (1/2/3/4): ").strip()
        if choice in ['1', '2', '3', '4']:
            break
        print("Enter a valid option (1, 2, 3, or 4).")

    max_new_tokens = 100 if choice == '1' else 250 if choice == '2' else 500 if choice == '3' else 20000
    margin = max_new_tokens // 5

    print("Press exit, quit, or q to finish the conversation.")

    while True:
        prompt = input("Me: ")
        if prompt.lower() in ['exit', 'quit', 'q']:
            print("Thanks for the conversation")
            break

        # Tokenize the input prompt and prepare model inputs with attention mask
        inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(merged_model.device)
        # Set stopping criteria to end generation when punctuation is reached near the token limit
        stopping_criteria = StoppingCriteriaList([StopAtPunctuationAfterMargin(tokenizer, max_new_tokens, margin)])

        # Generate output tokens from the model using the prepared inputs and stopping criteria
        output_ids = merged_model.generate(
            inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=max_new_tokens,
            stopping_criteria=stopping_criteria,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

        generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        # Remove repeated prompt from the output
        if generated.lower().startswith(prompt.lower()):
            generated = generated[len(prompt):].strip()

        # Remove numbered lists like "2. ", "3. "
        cleaned_generated = re.sub(r'\b\d+\.\s*', '', generated)

        print("\nUser Prompt:", prompt)
        print("Bot:", cleaned_generated, "\n")


In [11]:
chatbotStart()

Choose text length:
1: Short (max_new_tokens = 100)
2: Medium (max_new_tokens = 500)
3: Long (max_new_tokens = 1000)
4: Very Long (max_new_tokens = 20000) - Recommended only for code generation

Your choice (1/2/3/4): 1
Press exit, quit, or q to finish the conversation.
Me: Hello! Can you introduce yourself?

User Prompt: Hello! Can you introduce yourself?
Bot: Hello! I'm an assistant designed to help answer questions and provide information. I don't have the ability to introduce myself in the same way a human would, but I can tell you that I'm here to help answer any questions you might have, and I'm programmed to provide accurate and reliable information. I'm also able to understand and process natural language, so feel free to ask me anything in a conversational manner. 

Me: Who is Albert Enistein?

User Prompt: Who is Albert Enistein?
Bot: Albert Einstein was a theoretical physicist who developed the theory of general relativity, one of the two pillars of modern physics (alongside

In [12]:
chatbotStart()

Choose text length:
1: Short (max_new_tokens = 100)
2: Medium (max_new_tokens = 500)
3: Long (max_new_tokens = 1000)
4: Very Long (max_new_tokens = 20000) - Recommended only for code generation

Your choice (1/2/3/4): 2
Press exit, quit, or q to finish the conversation.
Me: How does the immune system fight off infections?

User Prompt: How does the immune system fight off infections?
Bot: Recognition: The immune system recognizes foreign substances, such as bacteria or viruses, through various receptors on the surface of immune cells. These receptors can identify specific molecular patterns that are unique to these microbes.

Activation: Once a foreign substance is recognized, the immune system responds by activating various immune cells, such as neutrophils, macrophages, and T cells. These cells then begin to engulf or destroy the microbe.

Phagocytosis: Neutrophils and macrophages use a process called phagocytosis to engulf and destroy microbes. The cell surrounds the microbe with it

In [13]:
chatbotStart()

Choose text length:
1: Short (max_new_tokens = 100)
2: Medium (max_new_tokens = 500)
3: Long (max_new_tokens = 1000)
4: Very Long (max_new_tokens = 20000) - Recommended only for code generation

Your choice (1/2/3/4): 3
Press exit, quit, or q to finish the conversation.
Me: Provide a well-structured explanation of the Big Bang theory.

User Prompt: Provide a well-structured explanation of the Big Bang theory.
Bot: The Big Bang theory is the prevailing cosmological model that explains the origin and evolution of the universe. According to this theory, the universe began as an infinitely dense and hot point, often referred to as a singularity, around 8 billion years ago. This singularity then underwent a massive explosion, expanding rapidly and cooling down as it expanded.

As the universe expanded and cooled, various subatomic particles began to form. Around 10-15 minutes after the Big Bang, the first subatomic particles, such as quarks and electrons, came into existence. These particle

In [14]:
chatbotStart()

Choose text length:
1: Short (max_new_tokens = 100)
2: Medium (max_new_tokens = 500)
3: Long (max_new_tokens = 1000)
4: Very Long (max_new_tokens = 20000) - Recommended only for code generation

Your choice (1/2/3/4): 4
Press exit, quit, or q to finish the conversation.
Me: Can you generate a simple PyTorch script to train a CNN?

User Prompt: Can you generate a simple PyTorch script to train a CNN?
Bot: I'd be happy to help you get started with a simple PyTorch script for training a Convolutional Neural Network (CNN). Here's an example of a basic CNN architecture and how to train it using PyTorch:

```python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Set device for GPU if available, otherwise use CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load and normalize the CIFAR10 dataset
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.R