In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch


model_name='Qwen/Qwen2.5-0.5B-Instruct'

device=torch.device('cpu')

model=AutoModelForCausalLM.from_pretrained(
  model_name,
  device_map={"": device},
  # torch_dtype='auto',
  torch_dtype=torch.float16,
  # trust_remote_code=True,
  low_cpu_mem_usage=True
)

tokenizer=AutoTokenizer.from_pretrained(model_name)

# print(model)

prompt='写一篇小红书文案, 关于一人公司<|assistant|>'

# tokenize input prompt
input_ids=tokenizer(prompt, return_tensors='pt').input_ids.to(device)

# gen text
gen_output=model.generate(
  input_ids=input_ids,
  max_new_tokens=100
)

# print(gen_output)

print(tokenizer.decode(gen_output[0]))

写一篇小红书文案, 关于一人公司<|assistant|>的求职攻略

标题：🌈✨ 一职难求，但一个人公司的就业之路！🔥💼

正文：
亲爱的小伙伴们，今天我来给大家分享一个我在工作中发现的一个有趣的小秘密——一个人公司。你可能不知道，一个人公司不仅能让你找到工作，还能让你在职场上更加自由和自在。

首先，我想说的是，一个人公司的优势远不止于此。它让你有更多的时间去享受生活，因为没有团队压力，你可以更专注于自己的事情。而且


In [13]:
from transformers import AutoTokenizer
import torch

# device=torch.device('mps')
device=torch.device('cpu')
tokenizer=AutoTokenizer.from_pretrained('Qwen/Qwen2.5-0.5B-Instruct')

prompt='hello, who are you'

input_ids=tokenizer(prompt, return_tensors='pt').input_ids.to(device)
print(input_ids)

tensor([[14990,    11,   879,   525,   498]])


In [3]:
from transformers import AutoTokenizer, AutoModel
import torch
import os

# Set proxy
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:1087'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:1087'

embed_model='nomic-ai/nomic-embed-text-v1.5'

tokenizer=AutoTokenizer.from_pretrained(embed_model)

# prompt='写一篇小红书文案, 关于一人公司<|assistant|>'
prompt='hello world'
input_ids=tokenizer(prompt, return_tensors='pt').input_ids.to('mps')
# print(input_ids)

# see each token
for id in input_ids[0]:
  print(id, tokenizer.decode(id))


tensor(101, device='mps:0') [CLS]
tensor(7592, device='mps:0') hello
tensor(2088, device='mps:0') world
tensor(102, device='mps:0') [SEP]


In [6]:
# contextual word embedding

from transformers import AutoModel, AutoTokenizer

embed_model='nomic-ai/nomic-embed-text-v1.5'
model_name='Qwen/Qwen2.5-0.5B-Instruct'

tokenizer=AutoTokenizer.from_pretrained(embed_model)

model=AutoModel.from_pretrained(model_name)

# tokenize sentence
tokens=tokenizer('hello', return_tensors='pt')

output=model(**tokens)[0]

print(output)

print('tokens: ', tokens)

tensor([[[-1.0028, -6.5665,  1.8038,  ...,  5.9021, -2.5729,  5.6168],
         [-0.3730, -6.3617, -3.1002,  ...,  4.3737, -2.5950,  7.9185],
         [ 1.3865, -1.9655, -1.4372,  ...,  2.7270,  2.2920,  2.4780]]],
       grad_fn=<MulBackward0>)
tokens:  {'input_ids': tensor([[ 101, 7592,  102]]), 'token_type_ids': tensor([[0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1]])}


In [17]:
# use microsoft/deberta model
from transformers import AutoModel, AutoTokenizer

embed_model='microsoft/deberta-base'
model_name='microsoft/deberta-v3-xsmall'

tokenizer=AutoTokenizer.from_pretrained(embed_model)

model=AutoModel.from_pretrained(model_name)

# tokenize sentence
tokens=tokenizer("hello, what's your name?", return_tensors='pt')

output=model(**tokens)[0]

print(output)

print('output shape: ', output.shape)

tensor([[[-3.4600, -0.0252, -0.0795,  ..., -0.1862, -0.3723, -0.0297],
         [-0.2925,  0.7294,  0.5701,  ..., -0.2953, -0.2545, -0.2009],
         [-1.0567,  0.5812, -0.4387,  ...,  0.3261, -0.8292,  0.8170],
         ...,
         [ 0.5983,  0.2496,  0.0620,  ...,  1.0276, -0.7775, -0.7397],
         [-1.1675, -0.1209,  0.0104,  ...,  0.6969, -0.1156, -0.5755],
         [-3.2867, -0.0255,  0.0604,  ..., -0.2898, -0.1752, -0.1026]]],
       grad_fn=<NativeLayerNormBackward0>)
output shape:  torch.Size([1, 9, 384])


In [8]:
# sentence embedding
from sentence_transformers import SentenceTransformer

model=SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

vec=model.encode('best movie ever')

print(vec, '\n', vec.shape)

[-1.01118244e-01  6.21917145e-03 -5.31980842e-02 -3.73656233e-03
 -4.93359007e-02  7.06774965e-02  9.81120765e-03 -6.60492200e-03
  1.02338508e-01 -3.96541581e-02 -4.90543470e-02  6.84917942e-02
  1.52453668e-02  6.61561489e-02 -4.10934500e-02  7.48507529e-02
  3.59498858e-02 -4.57872041e-02  4.98377299e-03 -1.02802239e-01
 -2.28543431e-02  2.00467352e-02  1.60432365e-02  2.29911208e-02
 -5.77333309e-02  6.32299259e-02 -7.45138386e-03  1.58397670e-04
 -1.20708048e-01 -1.80868208e-02  2.48436797e-02  3.47675243e-03
 -5.60263135e-02  6.32715470e-04 -2.15360988e-02 -5.32185426e-03
 -1.17753586e-02 -5.05840965e-02 -2.54806317e-02 -2.82878876e-02
 -1.00786716e-01 -5.71530648e-02  2.63541080e-02 -1.95630044e-02
  9.32950005e-02 -6.16598912e-02 -8.21805838e-03 -1.23697044e-02
  6.45528510e-02  5.62597141e-02 -5.23218066e-02  5.20572923e-02
  1.29822977e-02  3.79677191e-02  6.79668561e-02  2.24372605e-03
 -7.05436617e-02 -9.18390602e-02  9.04516596e-03 -3.44583616e-02
 -4.47851196e-02  5.45186