# Llama 3 8B Without QLORA and PEFT | Dataset A

In [14]:
import json
import wandb
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from transformers import pipeline, BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM

pytorch version 2.4.0+cu124


In [9]:
config_data = json.load(open('../config.json'))
HF_TOKEN = config_data['HF_TOKEN']
WANDB_TOKEN = config_data['WANDB_TOKEN']

wandb.login(key=WANDB_TOKEN)

run = wandb.init(
    project='wz_llama_zeroshot',
    config={"model_name": "Meta-Llama-3-8B"}
)

FileNotFoundError: [Errno 2] No such file or directory: 'config.json'

In [10]:
id_to_label_mapping = {0: 'A1', 1: 'A2', 2: 'B1', 3: 'B2', 4: 'C1', 5: 'C2'}
label_to_id_mapping = {'A1': 0, 'A2': 1, 'B1': 2, 'B2': 3, 'C1': 4, 'C2': 5}
cefr_levels = ["A1", "A2", "B1", "B2", "C1", "C2"]

In [11]:
train = pd.read_csv('../datasets/train.csv')
test = pd.read_csv('../datasets/test.csv')
validation = pd.read_csv('../datasets/validation.csv')

In [17]:
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True
)

In [18]:
model_name = "meta-llama/Meta-Llama-3-8B"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    resume_download=None,
    device_map=None,
    quantization_config=quantization_config,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name, resume_download=None)

tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [20]:
# log_thread = threading.Thread(target=log_system_metrics, args=(10,), daemon=True)
# log_thread.start()

def predict(test_data, prediction_model, prediction_tokenizer):
    predictions = []
    global cefr_levels
    
    for index, row in test_data[:5].iterrows():
        prompt = f"""Classify the text below into one of six CEFR levels (A1, A2, B1, B2, C1, C2). Select between these 6 levels: A1, A2, B1, B2, C1, C2.
Text: {row['text']}
CEFR level: """
        pipe = pipeline(
            "text-generation",
            model=prediction_model,
            tokenizer=prediction_tokenizer,
            max_new_tokens=2,
            temperature=0.1,
        )

        result = pipe(prompt)
        print(result)
        answer = result[0]['generated_text'].split("level:")[-1].strip()
        print(answer)
        # Determine the predicted category
        for level in cefr_levels:
            if level.lower() in answer.lower():
                predictions.append(level)
                break
        else:
            predictions.append("none")

    return predictions
y_pred = predict(test, model, tokenizer)

[{'generated_text': 'Classify the text below into one of six CEFR levels (A1, A2, B1, B2, C1, C2). Select between these 6 levels: A1, A2, B1, B2, C1, C2.\nText: After four single - season stints at Ohio Wesleyan , Nebraska , Kansas , and Stanford , Yost served as the head football coach for the Michigan Wolverines football team from 1901 through 1923 , and again in 1925 and 1926 .\nCEFR level: 1\n'}]
1
[{'generated_text': 'Classify the text below into one of six CEFR levels (A1, A2, B1, B2, C1, C2). Select between these 6 levels: A1, A2, B1, B2, C1, C2.\nText:  I am a Christian and do nt want to serve a Muslim army ,  he had written , adding that he had been attending church since 1998 .\nCEFR level:  B2'}]
B2
[{'generated_text': 'Classify the text below into one of six CEFR levels (A1, A2, B1, B2, C1, C2). Select between these 6 levels: A1, A2, B1, B2, C1, C2.\nText: Anari , from Greek ( αναρή ) is a crumbly fresh whey cheese , similar to ricotta , made from goat or sheep milk ; usual

In [17]:
from matplotlib import pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

print(y_pred)

y_true = test['level']

def map_func(x):
    return label_to_id_mapping.get(x, -1)

y_true_mapped = np.vectorize(map_func)(y_true)
y_pred_mapped = np.vectorize(map_func)(y_pred)

# Calculate accuracy
accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
print(f'Accuracy: {accuracy:.3f}')

# Generate accuracy report
unique_labels = set(y_true_mapped)  # Get unique labels

for label in unique_labels:
    label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
    label_y_true = [y_true_mapped[i] for i in label_indices]
    label_y_pred = [y_pred_mapped[i] for i in label_indices]
    label_accuracy = accuracy_score(label_y_true, label_y_pred)
    print(f'Accuracy for label {cefr_levels[label]}: {label_accuracy:.3f}')

class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=cefr_levels, labels=list(range(len(cefr_levels))))
class_report_dict = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=cefr_levels, labels=list(range(len(cefr_levels))), output_dict=True)
print('\nClassification Report:')
print(class_report)



# Generate confusion matrix
conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(cefr_levels))))
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix,
                              display_labels=cefr_levels)
disp.plot()
print('\nConfusion Matrix:')
print(conf_matrix)
# wandb.log({
#     "Class Proportions": wandb.sklearn.plot_class_proportions(train['level'], test['test'], cefr_levels),
#     "Confusion Matrix Plot": wandb.sklearn.plot_confusion_matrix(y_true, y_pred, cefr_levels),
#     "Confusion Matrix": wandb.plot.confusion_matrix(y_true=y_true, preds=y_pred, class_names=cefr_levels),
#     "Confusion Matrix Disp": plt,
#     "Confusion Matrix Disp 2": disp.plot(),
#     "Classification Report": class_report_dict,
#     "Classification Report DF": class_report_dict,
# })

['none', 'none', 'none', 'none', 'none']


ValueError: Found input variables with inconsistent numbers of samples: [1000, 5]

In [None]:
plt.show()

In [39]:
wandb.finish()