In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

from datasets import DatasetDict, Features, Sequence, Value, load_dataset

import transformers

import torch

import os
import sys
sys.path.append(os.getcwd()+"/../..")
from src import paths

import tqdm

In [None]:
# # Download model
# checkpoint = "meta-llama/Llama-2-7b-hf"
# model = AutoModelForCausalLM.from_pretrained(checkpoint)
# tokenizer = AutoTokenizer.from_pretrained(model)

# # Save model
# model.save_pretrained(paths.MODEL_PATH/'llama2')

# # Save tokenizer
# tokenizer.save_pretrained(paths.MODEL_PATH/'llama2')

In [2]:
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(paths.MODEL_PATH/'llama2', padding_side='left')
model = AutoModelForCausalLM.from_pretrained(paths.MODEL_PATH/'llama2', device_map="auto", load_in_4bit=True)

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [3]:
# Check device allocation
for name, param in model.named_parameters():
    print(f"Device of {name}: ", param.device)

Device of model.embed_tokens.weight:  cuda:0
Device of model.layers.0.self_attn.q_proj.weight:  cuda:0
Device of model.layers.0.self_attn.k_proj.weight:  cuda:0
Device of model.layers.0.self_attn.v_proj.weight:  cuda:0
Device of model.layers.0.self_attn.o_proj.weight:  cuda:0
Device of model.layers.0.mlp.gate_proj.weight:  cuda:0
Device of model.layers.0.mlp.up_proj.weight:  cuda:0
Device of model.layers.0.mlp.down_proj.weight:  cuda:0
Device of model.layers.0.input_layernorm.weight:  cuda:0
Device of model.layers.0.post_attention_layernorm.weight:  cuda:0
Device of model.layers.1.self_attn.q_proj.weight:  cuda:0
Device of model.layers.1.self_attn.k_proj.weight:  cuda:0
Device of model.layers.1.self_attn.v_proj.weight:  cuda:0
Device of model.layers.1.self_attn.o_proj.weight:  cuda:0
Device of model.layers.1.mlp.gate_proj.weight:  cuda:0
Device of model.layers.1.mlp.up_proj.weight:  cuda:0
Device of model.layers.1.mlp.down_proj.weight:  cuda:0
Device of model.layers.1.input_layernorm.w

In [21]:
# Load data
data_files = {"train": "ms-diag_clean_train.csv", "validation": "ms-diag_clean_val.csv", "test": "ms-diag_clean_test.csv"}
df = load_dataset(os.path.join(paths.DATA_PATH_PREPROCESSED,'ms-diag'), data_files = data_files)

In [47]:
# Load data
data_files = {"train": "ms-diag_clean_train.csv", "validation": "ms-diag_clean_val.csv", "test": "ms-diag_clean_test.csv"}
df = load_dataset(os.path.join(paths.DATA_PATH_PREPROCESSED,'ms-diag'), data_files = data_files)
# Prompt
prompt = """###Classify the following text as 'primary_progressive_multiple_sclerosis', 'secondary_progressive_multiple_sclerosis', or'relapsing_remitting_multiple_sclerosis'.###
Text: 

"""

# Prompt construction function
def construct_prompt(text):
    return prompt + text + '\n Classification: '

def preprocess(examples):
    examples['text'] = construct_prompt(examples['text'])
    return examples
df = df.map(preprocess)
model_inputs = tokenizer(df["train"]["text"][0], return_tensors="pt").to("cuda")
model_inputs = {k: v.to(torch.int64).to("cuda") for k, v in model_inputs.items()}
generated_ids = model.generate(**model_inputs, max_new_tokens=20, num_beams=4, do_sample=True, num_return_sequences = 1)
tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

Map:   0%|          | 0/143 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

["###Classify the following text as 'primary_progressive_multiple_sclerosis', 'secondary_progressive_multiple_sclerosis', or'relapsing_remitting_multiple_sclerosis'.###\nText: \n\nErstmanifestation einer schubförmig-remittierenden Multiple Sklerose\r\n··anamnestisch: vor 2 Wochen Beginn mit Übelkeit und Erbrechen, langsam aufgetretener Schwindel ohne Richtungskomponente, Verstärkung bei körperlicher und psychischer Anstrengung, im Verlauf zusätzlich beinbetonte Schwäche des gesamten Körpers, Kribbelparästhesien am Rumpf, Gesichtsfeldeinfschränkung und Visusminderung des linken Auges, Extremitätenataxie bds., Babinski bds positiv\r\n·klinisch: Visusminderung linkes Auge, fingerperimetrisch eingeschränktes Gesichtsfeld des linken Auges insbesondere temporal, verlangsamte direkte und konsensuelle Lichtreaktion links, skandierende Sprache, distalbetonte Hemiparese links ohne faziale Beteiligung, Dysdiadochokinese bds und Extremitätenataxie, Reflexsprung C6-C7, Muskeleigenreflexe an den Bei

In [25]:
print("Length of class labels: ")
[len(label) for label in tokenizer(list(set(df["train"]["labels"])))["input_ids"]]

Length of class labels: 


[12, 14, 12]

In [26]:
# Tokenize dataset
def tokenize(examples):
    return tokenizer(examples['text'], truncation=True, return_tensors = "pt", padding = True, max_length = 1024)

df = df.map(tokenize, batched=True, batch_size=512)

Map:   0%|          | 0/143 [00:00<?, ? examples/s]

Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

In [11]:
test_sample = {"input_ids":df["train"]["input_ids"][:4],
                "attention_mask":df["train"]["attention_mask"][:4]}
test_sample = {k: v.to(torch.int32).to("cuda") for k, v in test_sample.items()}

AttributeError: 'list' object has no attribute 'to'

In [35]:
import subprocess as sp
import os

def get_gpu_memory():
    command = "nvidia-smi --query-gpu=memory.free --format=csv"
    memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:]
    memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)]
    return memory_free_values

get_gpu_memory()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[173, 183]