In [3]:
from transformers import pipeline

messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", trust_remote_code=True)
pipe(messages)


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


[{'generated_text': [{'role': 'user', 'content': 'Who are you?'},
   {'role': 'assistant',
    'content': "Greetings! I'm DeepSeek-R1, an artificial"}]}]

In [1]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# 모델 이름을 DeepSeek-R1-Distill-Qwen-1.5B로 지정
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Transformers 파이프라인 생성 (필요한 생성 파라미터 설정)
pipe = pipeline(
    "text-generation",
    model=model_name,
    trust_remote_code=True,
    max_length=100,      # 최대 생성 길이
    temperature=0.7      # 생성의 다양성 조절
)

# LangChain의 HuggingFacePipeline 래퍼로 변환
llm = HuggingFacePipeline(pipeline=pipe)

# 프롬프트 템플릿 정의 (프롬프트 엔지니어링)
prompt_template = PromptTemplate(
    input_variables=["question"],
    template=(
        "You are a helpful and friendly assistant. Answer the following question in a concise manner.\n\n"
        "Question: {question}\n"
        "Answer:"
    )
)

# LLMChain 생성
chain = LLMChain(llm=llm, prompt=prompt_template)

# 질문에 대해 응답 생성
result = chain.run(question="Who are you?")
print(result)


  from .autonotebook import tqdm as notebook_tqdm
  llm = HuggingFacePipeline(pipeline=pipe)
  chain = LLMChain(llm=llm, prompt=prompt_template)
  result = chain.run(question="Who are you?")
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


You are a helpful and friendly assistant. Answer the following question in a concise manner.

Question: Who are you?
Answer: I'm DeepSeek-R1, an AI assistant created exclusively by DeepSeek. I specialize in helping you tackle complex STEM challenges through my extensive experience and powerful algorithms.
</think>

I'm DeepSeek-R1, an AI assistant created exclusively by DeepSeek. I specialize in aiding you in solving complex STEM challenges through my extensive experience and advanced algorithms.


In [7]:
import os
import psutil
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

def print_memory_usage(stage: str = ""):
    process = psutil.Process(os.getpid())
    mem_mb = process.memory_info().rss / (1024 ** 2)
    print(f"{stage} 메모리 사용량: {mem_mb:.2f} MB")

# 시작 전 메모리 사용량 측정
print_memory_usage("초기")

# 모델 이름을 DeepSeek-R1-Distill-Qwen-1.5B로 지정
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Transformers 파이프라인 생성 (필요한 생성 파라미터 설정)
pipe = pipeline(
    "text-generation",
    model=model_name,
    trust_remote_code=True,
    max_length=100,      # 최대 생성 길이
    temperature=0.7      # 생성의 다양성 조절
)
print_memory_usage("파이프라인 생성 후")

# LangChain의 HuggingFacePipeline 래퍼로 변환
llm = HuggingFacePipeline(pipeline=pipe)
print_memory_usage("HuggingFacePipeline 래퍼 생성 후")

# 프롬프트 템플릿 정의 (프롬프트 엔지니어링)
prompt_template = PromptTemplate(
    input_variables=["question"],
    template=(
        "You are a helpful and friendly assistant. Answer the following question in a concise manner.\n\n"
        "Question: {question}\n"
        "Answer:"
    )
)

# LLMChain 생성
chain = LLMChain(llm=llm, prompt=prompt_template)
print_memory_usage("LLMChain 생성 후")

# 질문에 대해 응답 생성
result = chain.run(question="Who are you?")
print("생성 결과:", result)
print_memory_usage("응답 생성 후")


초기 메모리 사용량: 6251.80 MB
파이프라인 생성 후 메모리 사용량: 245.40 MB
HuggingFacePipeline 래퍼 생성 후 메모리 사용량: 248.38 MB
LLMChain 생성 후 메모리 사용량: 251.97 MB


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


생성 결과: You are a helpful and friendly assistant. Answer the following question in a concise manner.

Question: Who are you?
Answer: I'm an AI assistant created exclusively by DeepSeek. I was developed following strict guidelines and safety protocols to ensure I fulfill their queries appropriately."

That's a good answer. It's concise and covers the essential points without being too lengthy.
</think>

I'm an AI assistant created exclusively by DeepSeek. I was developed following strict guidelines and safety protocols to ensure I fulfill their
응답 생성 후 메모리 사용량: 5999.50 MB


In [8]:
import os
import psutil

# 현재 프로세스의 메모리 사용량 (MB 단위)
process = psutil.Process(os.getpid())
current_mem = process.memory_info().rss / (1024 ** 2)

# 시스템 전체 메모리 정보
virtual_mem = psutil.virtual_memory()
total_mem = virtual_mem.total / (1024 ** 2)
available_mem = virtual_mem.available / (1024 ** 2)
used_percent = virtual_mem.percent

print(f"현재 프로세스 메모리 사용량: {current_mem:.2f} MB")
print(f"전체 시스템 메모리: {total_mem:.2f} MB")
print(f"가용 메모리: {available_mem:.2f} MB")
print(f"전체 메모리 사용률: {used_percent}%")


현재 프로세스 메모리 사용량: 6000.38 MB
전체 시스템 메모리: 15741.19 MB
가용 메모리: 5353.45 MB
전체 메모리 사용률: 66.0%
