In [5]:
!pip install transformers torch datasets bitsandbytes



In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
from datasets import load_dataset

In [7]:
!pip install huggingface_hub
!huggingface-cli login

# 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


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: read).

In [8]:
model_name = 'meta-llama/Meta-Llama-3-8B'

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map='auto'
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:  68%|######7   | 3.38G/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [9]:
dbpedia = load_dataset("dbpedia_14")
print(dbpedia)

Downloading readme:   0%|          | 0.00/7.64k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/106M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/560000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/70000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'title', 'content'],
        num_rows: 560000
    })
    test: Dataset({
        features: ['label', 'title', 'content'],
        num_rows: 70000
    })
})


In [14]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import random

random.seed(42)

# Label encoding for the target variable (categorical labels)
label_encoder = LabelEncoder()
dbpedia_labels = label_encoder.fit_transform(dbpedia['train']['label'])

def create_prompt(entity):
    return f"Question: Provide detailed information about the entity '{entity}'?\nAnswer:"

def extract_all_layer_embeddings(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True)
    input_ids = inputs["input_ids"].to('cuda')

    with torch.no_grad():
        outputs = model(input_ids, output_hidden_states=True)
        hidden_states = outputs.hidden_states

    first_layer_embedding = hidden_states[0][:, -1, :]  
    middle_index = len(hidden_states) // 2
    middle_layer_embedding = hidden_states[middle_index][:, -1, :]  
    final_layer_embedding = hidden_states[-1][:, -1, :]  

    return first_layer_embedding.cpu().numpy(), middle_layer_embedding.cpu().numpy(), final_layer_embedding.cpu().numpy()

num_samples = 500
total_samples = len(dbpedia['train']['title'])
random_indices = random.sample(range(total_samples), num_samples)

first_layer_embeddings = []
middle_layer_embeddings = []
final_layer_embeddings = []
sample_entities = [dbpedia['train']['title'][i] for i in random_indices]
sample_labels = [dbpedia_labels[i] for i in random_indices]

for entity in sample_entities:
    prompt = create_prompt(entity)
    first_emb, middle_emb, final_emb = extract_all_layer_embeddings(prompt, model, tokenizer)

    first_layer_embeddings.append(first_emb)
    middle_layer_embeddings.append(middle_emb)
    final_layer_embeddings.append(final_emb)

first_layer_embeddings = np.vstack(first_layer_embeddings)
middle_layer_embeddings = np.vstack(middle_layer_embeddings)
final_layer_embeddings = np.vstack(final_layer_embeddings)

def train_classifier(embeddings, sample_labels, layer_name):

    X_train, X_test, y_train, y_test = train_test_split(embeddings, sample_labels, test_size=0.2, random_state=42)
    classifier = LogisticRegression(max_iter=1000)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy using {layer_name} layer embeddings: {accuracy * 100:.2f}%")
    return accuracy



first_layer_accuracy = train_classifier(first_layer_embeddings, sample_labels, "first")
middle_layer_accuracy = train_classifier(middle_layer_embeddings, sample_labels, "middle")
final_layer_accuracy = train_classifier(final_layer_embeddings, sample_labels, "final")

print("\nComparison of Classifier Performance:")
print(f"First Layer Accuracy: {first_layer_accuracy * 100:.2f}%")
print(f"Middle Layer Accuracy: {middle_layer_accuracy * 100:.2f}%")
print(f"Final Layer Accuracy: {final_layer_accuracy * 100:.2f}%")

Accuracy using first layer embeddings: 9.00%
Accuracy using middle layer embeddings: 66.00%
Accuracy using final layer embeddings: 70.00%

Comparison of Classifier Performance:
First Layer Accuracy: 9.00%
Middle Layer Accuracy: 66.00%
Final Layer Accuracy: 70.00%
