In [4]:
import torch
from transformers import pipeline

In [3]:
chat = [
    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
]

In [5]:
pipeline = pipeline(task="text-generation", model="Qwen/Qwen2.5-0.5B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
response = pipeline(chat, max_new_tokens=512)
print(response[0]["generated_text"][-1]["content"])

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
Device set to use cuda:0


Sure! In New York City, there's an abundance of exciting activities for everyone to enjoy:

1. **The Empire State Building**: A visit to the iconic skyscraper is a must-do. It's not just a sight but also a great place to eat and relax.

2. **Central Park**: For a stroll through beautiful green spaces, Central Park is your go-to destination. You'll find lush gardens, fountains, and even a giant statue of George Washington!

3. **The Metropolitan Museum of Art**: Visit this museum for its vast collection of art from ancient times to modern times. It’s a treasure trove of knowledge about different cultures and periods.

4. **The Brooklyn Botanic Garden**: If you're interested in plants, the Brooklyn Botanic Garden offers a serene environment where visitors can learn about botany and nature.

5. **The Statue of Liberty**: This landmark is more than just a historical monument; it's a symbol of freedom and democracy. Take a moment to admire it and reflect on the importance of these ideals.



In [9]:
response[0].keys()

dict_keys(['generated_text'])

In [None]:
response[0] # assistnat에서 content 값

{'generated_text': [{'role': 'system',
   'content': 'You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986.'},
  {'role': 'user',
   'content': 'Hey, can you tell me any fun things to do in New York?'},
  {'role': 'assistant',
   'content': "Sure! In New York City, there's an abundance of exciting activities for everyone to enjoy:\n\n1. **The Empire State Building**: A visit to the iconic skyscraper is a must-do. It's not just a sight but also a great place to eat and relax.\n\n2. **Central Park**: For a stroll through beautiful green spaces, Central Park is your go-to destination. You'll find lush gardens, fountains, and even a giant statue of George Washington!\n\n3. **The Metropolitan Museum of Art**: Visit this museum for its vast collection of art from ancient times to modern times. It’s a treasure trove of knowledge about different cultures and periods.\n\n4. **The Brooklyn Botanic Garden**: If you're interested in plants, the Brooklyn Botanic Garden offers a 

In [11]:
chat = response[0]["generated_text"]

In [12]:
chat

[{'role': 'system',
  'content': 'You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986.'},
 {'role': 'user',
  'content': 'Hey, can you tell me any fun things to do in New York?'},
 {'role': 'assistant',
  'content': "Sure! In New York City, there's an abundance of exciting activities for everyone to enjoy:\n\n1. **The Empire State Building**: A visit to the iconic skyscraper is a must-do. It's not just a sight but also a great place to eat and relax.\n\n2. **Central Park**: For a stroll through beautiful green spaces, Central Park is your go-to destination. You'll find lush gardens, fountains, and even a giant statue of George Washington!\n\n3. **The Metropolitan Museum of Art**: Visit this museum for its vast collection of art from ancient times to modern times. It’s a treasure trove of knowledge about different cultures and periods.\n\n4. **The Brooklyn Botanic Garden**: If you're interested in plants, the Brooklyn Botanic Garden offers a serene environment where

In [13]:
chat.append(
    {"role": "user", "content": "Wait, what's so wild about soup cans?"}
)

In [14]:
response = pipeline(chat, max_new_tokens=512)
print(response[0]["generated_text"][-1]["content"])

I apologize for misunderstanding earlier. While I'm programmed to be witty and engaging, I don't have personal opinions or emotions. However, I can share some interesting facts about soup cans:

- The first commercially viable canned food was invented in 1901 by the Coca-Cola Company.
- When a can breaks, it typically falls apart because of the internal structure of the container, which is designed to withstand pressure without cracking.
- Some people believe that using a can instead of a bottle reduces waste and conserves resources, though this hasn't been scientifically proven.
- Canned foods often contain less salt and sugar compared to fresh produce due to being sealed at lower temperatures.
- Many brands offer "food-grade" cans made from materials like aluminum or steel, ensuring safety while still allowing for freshness.

If you're looking for something educational or practical, I'd recommend checking out resources related to sustainable living or cooking techniques.


In [15]:
pipeline.device

device(type='cuda', index=0)

In [16]:
allocated_memory_bytes = torch.cuda.memory_allocated()
# 현재 캐시된 GPU 메모리 (바이트)
cached_memory_bytes = torch.cuda.memory_reserved() # 또는 memory_cached() 버전 따라 다름

print(f"모델 로드 후 GPU 메모리 사용량:")
print(f"  할당된 메모리: {allocated_memory_bytes / (1024**3):.2f} GB")
print(f"  캐시된 메모리: {cached_memory_bytes / (1024**3):.2f} GB")
print("-" * 30)

모델 로드 후 GPU 메모리 사용량:
  할당된 메모리: 0.93 GB
  캐시된 메모리: 1.20 GB
------------------------------


In [5]:
from transformers import pipeline, BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(load_in_8bit=True)
pipeline = pipeline(task="text-generation", model="Qwen/Qwen2.5-0.5B-Instruct", device_map="auto", model_kwargs={"quantization_config": quantization_config})

Device set to use cuda:0


In [7]:
allocated_memory_bytes = torch.cuda.memory_allocated()
# 현재 캐시된 GPU 메모리 (바이트)
cached_memory_bytes = torch.cuda.memory_reserved() # 또는 memory_cached() 버전 따라 다름

print(f"모델 로드 후 GPU 메모리 사용량:")
print(f"  할당된 메모리: {allocated_memory_bytes / (1024**3):.2f} GB")
print(f"  캐시된 메모리: {cached_memory_bytes / (1024**3):.2f} GB")
print("-" * 30)

모델 로드 후 GPU 메모리 사용량:
  할당된 메모리: 0.59 GB
  캐시된 메모리: 0.64 GB
------------------------------


In [1]:
# Templates

from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Mistral-7B-Instruct and Zephyr-7B are finetuned from the same base model but they’re trained with different chat formats.
# Without chat templates, you have to manually write formatting code for each model and even minor errors can hurt performance. 

In [3]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
chat = [
  {"role": "user", "content": "Hello, how are you?"},
  {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
  {"role": "user", "content": "I'd like to show off how chat templating works!"},
]

tokenizer.apply_chat_template(chat, tokenize=False)

"<s> [INST] Hello, how are you? [/INST] I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"

In [4]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
chat = [
  {"role": "user", "content": "Hello, how are you?"},
  {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
  {"role": "user", "content": "I'd like to show off how chat templating works!"},
]

tokenizer.apply_chat_template(chat, tokenize=False)

"<|user|>\nHello, how are you?</s>\n<|assistant|>\nI'm doing great. How can I help you today?</s>\n<|user|>\nI'd like to show off how chat templating works!</s>\n"

In [9]:
# add_generation_prompt to True : indicate the start of a message
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto", torch_dtype=torch.bfloat16)

messages = [
    {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate",},
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
 ]

tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
print(tokenizer.decode(tokenized_chat[0]))

Loading checkpoint shards: 100%|██████████| 8/8 [00:02<00:00,  3.16it/s]


<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s> 
<|user|>
How many helicopters can a human eat in one sitting?</s> 
<|assistant|>



In [10]:
tokenized_chat.device

device(type='cpu')

In [11]:
outputs = model.generate(tokenized_chat.cuda(), max_new_tokens=128)
print(tokenizer.decode(outputs[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<|system|>
You are a friendly chatbot who always responds in the style of a pirate</s> 
<|user|>
How many helicopters can a human eat in one sitting?</s> 
<|assistant|>
Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.</s>


In [12]:
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
tokenized_chat

'<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s>\n<|user|>\nHow many helicopters can a human eat in one sitting?</s>\n'

In [13]:
import pyarrow.ipc as ipc
import pyarrow as pa
import pandas as pd
import sys, os

In [14]:
arrow_path = "/purestorage/AILAB/AI_1/tyk/1_Data/preprocess/features.arrow"   # ← 실제 경로로 바꿔주세요
if not os.path.exists(arrow_path):
    sys.exit(f"{arrow_path} not found")

In [15]:
with ipc.open_file(arrow_path) as f:
    table = f.read_all()

In [16]:
print("\n=== Arrow schema ===")
print(table.schema)


=== Arrow schema ===
structure_id: string
chain_id: string
rotations: list<item: list<item: list<item: float>>>
  child 0, item: list<item: list<item: float>>
      child 0, item: list<item: float>
          child 0, item: float
translations: list<item: list<item: float>>
  child 0, item: list<item: float>
      child 0, item: float
sequence: string
positions: list<item: int16>
  child 0, item: int16
template_mask: list<item: list<item: bool>>
  child 0, item: list<item: bool>
      child 0, item: bool
template_sequence: list<item: string>
  child 0, item: string
template_translations: list<item: list<item: list<item: float>>>
  child 0, item: list<item: list<item: float>>
      child 0, item: list<item: float>
          child 0, item: float
template_rotations: list<item: list<item: list<item: list<item: float>>>>
  child 0, item: list<item: list<item: list<item: float>>>
      child 0, item: list<item: list<item: float>>
          child 0, item: list<item: float>
              child 

In [17]:
print(f"\nTotal chains (rows): {table.num_rows}")


Total chains (rows): 0
