<a href="https://colab.research.google.com/github/tomtyiu/SuperTransformer-SHF/blob/main/supertransformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bitsandbytes>=0.39.0
!pip install --upgrade accelerate transformers



In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from peft import PeftModel
import torch
from accelerate import Accelerator

class SuperTransformers:
  def __init__(self, model, system_prompt, text, max_new_tokens):
    self.model = model
    self.system_prompt = system_prompt
    self.text = text
    self.max_new_tokens = max_new_tokens

  def HuggingfaceTransfomer(self):
    tokenizer = AutoTokenizer.from_pretrained(self.model)
    model = AutoModelForCausalLM.from_pretrained(self.model)
    return model, tokenizer

  def HuggingfacePipeline(self):
    tokenizer = AutoTokenizer.from_pretrained(self.model)
    model = AutoModelForCausalLM.from_pretrained(self.model)
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
    return pipe

  def HuggingFaceTransformer4bit(self):
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype="float16",  # Reduces precision, saving memory
        bnb_4bit_use_double_quant=True  # Further memory savings
    )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(self.model)  # Replace with your actual model name

    # Load quantized model
    model_4bit = AutoModelForCausalLM.from_pretrained(
        self.model,  # Replace with your actual model name
        quantization_config=quantization_config,
        device_map="auto"  # Ensures model is loaded on available GPU
    )

    # Use pipeline with the correct model
    pipe = pipeline("text-generation", model=model_4bit, tokenizer=tokenizer)  # Set device=0 for GPU

    #Define the system prompt and the user prompt
    #Combine the system prompt with the user prompt. The format here follows a common convention for chat-like interactions.
    full_prompt = f"System: {self.system_prompt}\nUser: {self.text}\nAssistant:"

    # Generate output with memory optimization
    output = pipe(self.text, max_new_tokens=self.max_new_tokens)  # Limit output length to save memory

    # Print the generated output
    print(output)

  def HuggingFaceTransformer8bit(self):
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True,
    )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(self.model)  # Replace with your actual model name

    # Load quantized model
    model_8bit = AutoModelForCausalLM.from_pretrained(
        self.model,  # Replace with your actual model name
        quantization_config=quantization_config,
        device_map="auto"  # Ensures model is loaded on available GPU
    )

    # Use pipeline with the correct model
    pipe = pipeline("text-generation", model=model_8bit, tokenizer=tokenizer)  # Set device=0 for GPU

    #Define the system prompt and the user prompt
    #Combine the system prompt with the user prompt. The format here follows a common convention for chat-like interactions.
    full_prompt = f"System: {self.system_prompt}\nUser: {self.text}\nAssistant:"

    # Generate output with memory optimization
    output = pipe(self.text, max_new_tokens=self.max_new_tokens)  # Limit output length to save memory

    # Print the generated output
    print(output)


SuperTransformers = SuperTransformers("EpistemeAI/ReasoningCore-3B-RE1-V2","You are a highly knowledgeable assistant with expertise in chemistry and physics. <reasoning>","What is the area of a circle, radius=16, reason step by step", 2026)
SuperTransformers.HuggingFaceTransformer8bit()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


[{'generated_text': 'What is the area of a circle, radius=16, reason step by step\n## Step 1: Recall the formula for the area of a circle\nThe formula for the area of a circle is given by A = πr^2, where A is the area and r is the radius of the circle.\n\n## Step 2: Plug in the given radius into the formula\nGiven that the radius of the circle is 16, we can substitute this value into the formula: A = π(16)^2.\n\n## Step 3: Calculate the square of the radius\nWe need to calculate the square of the radius, which is 16^2 = 256.\n\n## Step 4: Multiply the square of the radius by π\nNow we multiply the result from step 3 by π (approximately 3.14159): A = π * 256.\n\n## Step 5: Calculate the area\nPerforming the multiplication gives us the area: A = 3.14159 * 256 ≈ 804.25.\n\nThe final answer is: $\\boxed{804.25}$'}]
