In [1]:
from transformers import LlamaForSequenceClassification, LlamaTokenizer
import sentencepiece
import torch

model_name = 'meta-llama/Llama-2-7b-hf'
tokenizer = LlamaTokenizer.from_pretrained(model_name)
# デフォルトのモデルは２値分類の感情予測です（０：ネガティブ、１：ポジティブ）
model = LlamaForSequenceClassification.from_pretrained(model_name, num_labels=2)

# テキストをトークン化してエンコードします。
text = "Bad Llama you are bad guy"

encoded_input = tokenizer(text, return_tensors='pt')

# モデルに入力し、予測を取得します。
output = model(**encoded_input)

# 予測を取得します。
prediction = torch.argmax(output.logits)
print(prediction)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:56<00:00, 28.07s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor(0)


In [4]:
prompt = "classification: positive as 1, negative as 0\n"
# テキストをトークン化してエンコードします。
text = "love Llama love you"

encoded_input = tokenizer(prompt+text, return_tensors='pt')

# モデルに入力し、予測を取得します。
output = model(**encoded_input)

# 予測を取得します。
prediction = torch.argmax(output.logits)
print(prediction)



tensor(0)


In [7]:
output['past_key_values'][0][0].shape

torch.Size([1, 32, 2, 128])

In [10]:
output['logits'].detach().numpy().tolist()[0]

[0.3298485577106476, 0.5767533183097839]

In [6]:
# テキストをトークン化してエンコードします。
text = "please"

encoded_input = tokenizer(text, return_tensors='pt')

# モデルに入力し、予測を取得します。
output = model(**encoded_input)

# 予測を取得します。
prediction = torch.argmax(output.logits)
print(prediction)

tensor(0)


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
repo_id = "meta-llama/Llama-2-13b-chat-hf"
prompt = 'try answer faithfully'
# Llama 2 paper up to the start of section 2
question = "What is the paper about?"
# Load the model and prepare generate args
model = AutoModelForCausalLM.from_pretrained(repo_id, device_map="auto", rope_scaling={"type": "dynamic", "factor": 2.0}, load_in_8bit=True, num_labels=4) # use_auth_token=True, load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained(repo_id, use_fast=True)
model_inputs = tokenizer (prompt + question, return_tensors="pt").to("cuda")
print (model_inputs.input_ids.shape)
# 6782 tokens, needs a GPU with > 24GB
# Let's use it!
generate_kwargs = {"max_new_tokens" : 200, "do_sample": False}
gen_out = model.generate(**model_inputs, **generate_kwargs)
print(tokenizer.decode(gen_out[0], skip_special_tokens=True))

Downloading (…)lve/main/config.json:   0%|          | 0.00/587 [00:00<?, ?B/s]



Downloading (…)model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf", load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf")

prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate
generate_ids = model.generate(inputs.input_ids, max_length=30)
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

Downloading shards: 100%|██████████| 3/3 [00:00<00:00, 16.56it/s]
Loading checkpoint shards: 100%|██████████| 3/3 [02:37<00:00, 52.54s/it]
Downloading (…)okenizer_config.json: 100%|██████████| 1.62k/1.62k [00:00<00:00, 713kB/s]


"Hey, are you conscious? Can you talk to me?\n\nI'm just an AI, I don't have consciousness"

In [11]:
# Encode the prompt
prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")

# Forward pass through the model
output = model(**inputs, output_hidden_states=True)

# Get the last hidden states
hidden_states = output.hidden_states[-1]  # The tuple of hidden-states at the output of the last layer

# Get the embeddings for the last token
# Assuming batch size is 1, take the last token (-1) for the last layer
last_token_embedding = hidden_states[0, -1, :]

# Convert to numpy array if needed
last_token_embedding_np = last_token_embedding.detach().cpu().numpy()


In [24]:
inputs

{'input_ids': tensor([[    1, 18637, 29892,   526,   366, 19861, 29973,  1815,   366,  5193,
           304,   592, 29973]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [21]:
output.hidden_states[0].shape

torch.Size([1, 13, 5120])

In [15]:
last_token_embedding_np.shape

(5120,)

In [25]:
# Generate response
model.eval()  # Set the model to evaluation mode
generate_ids = model.generate(inputs.input_ids, max_length=30)

# Now, you need to get the embeddings of the generated token.
# Perform a forward pass with output_hidden_states=True to get all hidden states.
outputs = model(input_ids=generate_ids, output_hidden_states=True)

# Extract the hidden states
hidden_states = outputs.hidden_states[-1]  # The last layer's hidden states

# Assuming you want the first generated token, which is the first token after the input
# We find the position of the first generated token, which is at the length of the input IDs
position_of_first_generated_token = inputs.input_ids.size(1)

# Extract the embedding for the first generated token
# The hidden states are of the shape (batch_size, sequence_length, hidden_size)
first_generated_token_embedding = hidden_states[0, position_of_first_generated_token, :]

# If you want to use it outside of PyTorch, you can convert it to a numpy array
first_generated_token_embedding_np = first_generated_token_embedding.detach().cpu().numpy()




In [29]:
outputs.hidden_states[0].shape

torch.Size([1, 30, 5120])

In [3]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("ehartford/dolphin-2.1-mistral-7b")
model = AutoModelForCausalLM.from_pretrained("ehartford/dolphin-2.1-mistral-7b") #teknium/CollectiveCognition-v1.1-Mistral-7B

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Downloading (…)lve/main/config.json: 100%|██████████| 622/622 [00:00<00:00, 74.8kB/s]
Downloading (…)model.bin.index.json: 100%|██████████| 23.9k/23.9k [00:00<00:00, 8.70MB/s]
Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[

In [24]:
import torch
# Encode the prompt
prompt = "i love this house A: positive B: negative \n"
# テキストをトークン化してエンコードします。
text = ""
inputs = tokenizer(prompt+text, return_tensors="pt")

# Generate response with scores
generate_args = {
    "max_length": inputs.input_ids.size(1) + 2,  # Original length plus one token
    "output_scores": True,
    "return_dict_in_generate": True
}

# Generate one token to get its logprob
generate_output = model.generate(**inputs, **generate_args)

# Retrieve the scores for the generated token
scores = generate_output.scores[0]  # scores for the first generated token

# Convert the scores to probabilities
log_probs = torch.nn.functional.log_softmax(scores, dim=-1)

# Get the generated token ID
generated_token_id = generate_output.sequences[0, inputs.input_ids.size(1)].item()  # First token after the input sequence

# Get the log-probability of the generated token
log_prob_of_generated_token = log_probs[0, generated_token_id].item()

# Decode the generated token ID to the token string
generated_token = tokenizer.decode(generated_token_id)

print(f"The first generated token is: '{generated_token}' with a log probability of: {log_prob_of_generated_token}")
print('=====')
# Retrieve the full sequence of generated token IDs (including the prompt)
generated_sequence_ids = generate_output.sequences[0]

# Decode the entire generated sequence to a string
print(tokenizer.decode(generated_sequence_ids, skip_special_tokens=True))

# Get token IDs for 'A' and 'B'
token_id_A = tokenizer.convert_tokens_to_ids('A')
token_id_B = tokenizer.convert_tokens_to_ids('B')

# Get the log-probabilities of 'A' and 'B'
log_prob_of_A = log_probs[0, token_id_A].item()
log_prob_of_B = log_probs[0, token_id_B].item()

print(f"The log probability of 'A' as the first generated token is: {log_prob_of_A}")
print(f"The log probability of 'B' as the first generated token is: {log_prob_of_B}")

Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.


The first generated token is: '
' with a log probability of: -0.04539313539862633
=====
i love this house A: positive B: negative 

I
The log probability of 'A' as the first generated token is: -7.283852577209473
The log probability of 'B' as the first generated token is: -7.645366668701172


In [26]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


# Encode the prompt
prompt = "i love this house A: positive B: negative \n"
inputs = tokenizer(prompt, return_tensors="pt")

# Run the model to get the hidden states
outputs = model(**inputs, output_hidden_states=True)

# Retrieve the hidden states
hidden_states = outputs.hidden_states

# Get the last hidden state
last_hidden_state = hidden_states[-1]

# Get the embeddings for the last and second-to-last tokens
# -1 is the last token, -2 is the second-to-last token in the sequence
last_token_embedding = last_hidden_state[0, -1, :]
second_last_token_embedding = last_hidden_state[0, -2, :]

print("Embedding for the last token:", last_token_embedding)
print("Embedding for the second-to-last token:", second_last_token_embedding)


Embedding for the last token: tensor([7.6824, 0.1293, 2.7754,  ..., 2.5605, 3.1323, 1.7811],
       grad_fn=<SliceBackward0>)
Embedding for the second-to-last token: tensor([-0.5160, -3.8609, -4.0569,  ..., -0.7954,  5.8792, -3.3343],
       grad_fn=<SliceBackward0>)


In [19]:
inputs

{'input_ids': tensor([[    1,   613,  2016,   456,  2134,   330, 28747,  5278,   365, 28747,
          7087, 28705,    13, 15985,   865,   330,   442,   365,  4665, 28747,
         28705]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [20]:
generate_output

GreedySearchDecoderOnlyOutput(sequences=tensor([[    1,   613,  2016,   456,  2134,   330, 28747,  5278,   365, 28747,
          7087, 28705,    13, 15985,   865,   330,   442,   365,  4665, 28747,
         28705,   330,    13]]), scores=(tensor([[ -9.3919,  -8.9025,   1.6067,  ...,  -3.4850,   6.2570, -17.2919]]), tensor([[ -9.0299,  -8.1803,   4.8835,  ...,  -5.4123,   7.3636, -16.4609]])), attentions=None, hidden_states=None)

In [21]:
torch.topk(log_probs, 10)

torch.return_types.topk(
values=tensor([[-1.1539, -1.7772, -2.5101, -2.9475, -3.0478, -3.3729, -3.4379, -3.6138,
         -4.4046, -4.4968]]),
indices=tensor([[  330,    13,   613,  5278,  8202,   315, 28740,   365,   345, 28734]]))

In [23]:
tokenizer.decode([  330,    13,   613,  5278,  8202,   315, 28740,   365,   345, 28734])

'A\n i positive Pos I1 B "0'

In [9]:
# Retrieve the full sequence of generated token IDs (including the prompt)
generated_sequence_ids = generate_output.sequences[0]

# Decode the entire generated sequence to a string
print(tokenizer.decode(generated_sequence_ids, skip_special_tokens=True))


please judge the sentiment of given sentence, output only 0 or 1 please: 
 i love this house

0

1

2

3

4

5

6

7

8

9


In [27]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Initialize your model and tokenizer
# model = AutoModelForCausalLM.from_pretrained('your-model-name')
# tokenizer = AutoTokenizer.from_pretrained('your-model-name')

# Encode the prompt
prompt = "i love this house A: positive B: negative \n"
inputs = tokenizer(prompt, return_tensors="pt")

# Generate response with scores
generate_args = {
    "max_length": inputs.input_ids.size(1) + 2,  # Original length plus one token
    "output_scores": True,
    "return_dict_in_generate": True
}

# Generate one token to get its logprob
generate_output = model.generate(**inputs, **generate_args)

# Retrieve the full sequence of generated token IDs (including the prompt)
generated_sequence_ids = generate_output.sequences[0]

# Run the model again with the generated sequence to get hidden states
outputs = model(generated_sequence_ids.unsqueeze(0), output_hidden_states=True)

# Retrieve the last hidden state
hidden_states = outputs.hidden_states
last_hidden_state = hidden_states[-1]

# Get the embeddings for the last and second-to-last tokens
# -1 is the last token, -2 is the second-to-last token in the sequence
last_token_embedding = last_hidden_state[0, -1, :]
second_last_token_embedding = last_hidden_state[0, -2, :]

print("Embedding for the last token:", last_token_embedding)
print("Embedding for the second-to-last token:", second_last_token_embedding)


Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.


Embedding for the last token: tensor([ 0.9806, -2.3296,  3.0921,  ...,  6.5378,  0.3018,  3.7267],
       grad_fn=<SliceBackward0>)
Embedding for the second-to-last token: tensor([-2.2555,  2.6335,  5.9585,  ...,  4.3784,  6.0477,  1.9409],
       grad_fn=<SliceBackward0>)


In [28]:
generate_output

GreedySearchDecoderOnlyOutput(sequences=tensor([[    1,   613,  2016,   456,  2134,   330, 28747,  5278,   365, 28747,
          7087, 28705,    13,    13, 28737]]), scores=(tensor([[-12.0531, -11.6898,  -0.6606,  ...,  -4.7127,   4.2573, -12.7862]]), tensor([[-11.3678, -11.5735,  -0.9566,  ...,  -3.5125,   3.9774, -10.2945]])), attentions=None, hidden_states=None)

In [29]:
tokenizer.decode(generate_output.sequences[0], skip_special_tokens=True)

'i love this house A: positive B: negative \n\nI'

In [39]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# # Initialize your model and tokenizer
# model = AutoModelForCausalLM.from_pretrained('gpt2')  # Replace with your model
# tokenizer = AutoTokenizer.from_pretrained('gpt2')  # Replace with your tokenizer

# Text sequences
text_seq1 = 'i love this house A: positive B: negative \nA'
text_seq2 = 'i love this house A: positive B: negative \nB'

sequences = [text_seq1, text_seq2]

def calculate_log_likelihood(model, tokenizer, text):
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
    return outputs.loss.item()

# Calculate log likelihood for each sequence
log_likelihoods = [calculate_log_likelihood(model, tokenizer, seq) for seq in sequences]

# Determine which sequence has the highest likelihood
most_likely_sequence_index = log_likelihoods.index(min(log_likelihoods))  # Lower loss means higher likelihood
most_likely_sequence = sequences[most_likely_sequence_index]

print(f"The most likely sequence is sequence number {most_likely_sequence_index + 1}")


The most likely sequence is sequence number 1


In [38]:
log_likelihoods

[5.8033552169799805, 5.90085506439209]

In [3]:
# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-34B", trust_remote_code=True)

Downloading (…)fetensors.index.json: 100%|██████████| 42.8k/42.8k [00:00<00:00, 1.24MB/s]
Downloading shards:   0%|          | 0/7 [10:07<?, ?it/s]


KeyboardInterrupt: 



In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from sklearn.cluster import KMeans

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("ehartford/dolphin-2.1-mistral-7b")
model = AutoModelForCausalLM.from_pretrained("ehartford/dolphin-2.1-mistral-7b")

# Load the dataset
dataset = load_dataset("ag_news", split='train[:1000]')

# Function to extract embeddings
def extract_embeddings(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs, output_hidden_states=True)
    hidden_states = outputs.hidden_states
    last_hidden_state = hidden_states[-1]
    return last_hidden_state[0, -1, :].detach().numpy(), last_hidden_state[0, -2, :].detach().numpy()

# Extract embeddings for each example
embeddings_last_token = []
embeddings_second_last_token = []
for example in dataset:
    text = example['text']
    last_token_emb, second_last_token_emb = extract_embeddings(text)
    embeddings_last_token.append(last_token_emb)
    embeddings_second_last_token.append(second_last_token_emb)

# Cluster the embeddings
n_clusters = 4  # Assuming AG News has 4 categories
kmeans_last = KMeans(n_clusters=n_clusters).fit(embeddings_last_token)
kmeans_second_last = KMeans(n_clusters=n_clusters).fit(embeddings_second_last_token)

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:14<00:00,  7.04s/it]
Downloading builder script: 100%|██████████| 4.06k/4.06k [00:00<00:00, 16.8MB/s]
Downloading metadata: 100%|██████████| 2.65k/2.65k [00:00<00:00, 4.08MB/s]
Downloading readme: 100%|██████████| 7.95k/7.95k [00:00<00:00, 6.03MB/s]
Downloading data: 29.5MB [00:00, 46.0MB/s]                            
Downloading data: 1.86MB [00:00, 16.9MB/s]                  
Generating train split: 100%|██████████| 120000/120000 [00:02<00:00, 43638.57 examples/s]
Generating test split: 100%|██████████| 7600/7600 [00:00<00:00, 47060.01 examples/s]
  super()._check_params_vs_input(X, default_n_init=10)
huggingface/tokenizers: The current process just g

In [2]:
from sklearn.metrics import adjusted_rand_score

# Assuming kmeans_last and kmeans_second_last are your KMeans models from the previous code
# and dataset['label'] is the list of true labels

# Calculate ARI for the last token embeddings
ari_last = adjusted_rand_score(dataset['label'], kmeans_last.labels_)

# Calculate ARI for the second last token embeddings
ari_second_last = adjusted_rand_score(dataset['label'], kmeans_second_last.labels_)

# Print the results
print(f"Adjusted Rand Index using last token embeddings: {ari_last}")
print(f"Adjusted Rand Index using second last token embeddings: {ari_second_last}")


Adjusted Rand Index using last token embeddings: 0.12502712446739064
Adjusted Rand Index using second last token embeddings: 0.002119286661442183
