In [1]:
!pip install -qqq -U torch=='2.0.0'

!pip install -qqq -U accelerate=='0.25.0' peft=='0.7.1' bitsandbytes=='0.41.3.post2' transformers=='4.36.1' trl=='0.7.4'

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

2024-04-22 01:29:12.885246: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-22 01:29:12.885389: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-22 01:29:13.043252: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
filename = '/kaggle/input/ind-proj/decisions.csv'

df = pd.read_csv(filename, encoding="utf-8", encoding_errors="replace")

df = df[['text', 'decision']]

X_train = list()
X_test = list()
for decision in ["yes", "no"]:
    train, test  = train_test_split(df[df.decision==decision], 
                                    train_size=.8,
                                    test_size=.2, 
                                    random_state=42)
    X_train.append(train)
    X_test.append(test)

X_train = pd.concat(X_train).sample(frac=1, random_state=10)
X_test = pd.concat(X_test)

eval_idx = [idx for idx in df.index if idx not in list(train.index) + list(test.index)]
X_eval = df[df.index.isin(eval_idx)]
X_eval = (X_eval
          .groupby('decision', group_keys=False)
          .apply(lambda x: x.sample(n=50, random_state=10, replace=True)))
X_train = X_train.reset_index(drop=True)

def generate_prompt(data_point):
    return f"""### Instruction:
            Classify whether the given chunk involves a decision that will effect the story or not.
            A decision is defined as when the character goes about making a choice between two or more options. 
            The decision should be significant enough to affect the story in a major way.
            It doesn't really involve emotions, feelings or thoughts, but what the character does, or what happens to them.
            This involes interactions between characters, or the character and the environment.
            What isn't a decision is chunks describing the setting, or the character's thoughts or feelings.
            Return the answer as the corresponding decision label "yes" or "no"
            
            ### Text:
            {data_point["text"]}
            
            ### Decision:
            {data_point["decision"]}
            """

def generate_test_prompt(data_point):
    return f"""### Instruction:
            Classify whether the given chunk involves a decision that will effect the story or not.
            A decision is defined as when the character goes about making a choice between two or more options. 
            The decision should be significant enough to affect the story in a major way.
            It doesn't really involve emotions, feelings or thoughts, but what the character does, or what happens to them.
            This involes interactions between characters, or the character and the environment.
            What isn't a decision is chunks describing the setting, or the character's thoughts or feelings.
            Return the answer as the corresponding decision label "yes" or "no"
            
            ### Text:
            {data_point["text"]}
            
            ### Decision:
            """

X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1), 
                       columns=["text"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1), 
                      columns=["text"])

y_true = X_test.decision
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [6]:
def evaluate(y_true, y_pred):
    labels = ['yes', 'no', 'none']
    mapping = {"yes": 1, "no": 0, 'none':2}
    def map_func(x):
        return mapping.get(x, 1)
    
    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [7]:
from transformers import AutoTokenizer, FalconForCausalLM

model_name = "Rocketknight1/falcon-rw-1b"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = FalconForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          trust_remote_code=True,
                                          padding_side="left",
                                          add_bos_token=True,
                                          add_eos_token=True,
                                         )

tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/675 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

In [8]:
def predict(X_test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer,
                        max_new_tokens = 1, 
                        temperature = 0.0,
                       )
        result = pipe(prompt, pad_token_id=pipe.tokenizer.eos_token_id)
        answer = result[0]['generated_text'].split("=")[-1].lower()
        if "yes" in answer:
            y_pred.append("yes")
        elif "no" in answer:
            y_pred.append("no")
        else:
            y_pred.append("none")
    return y_pred

In [9]:
y_pred = predict(X_test, model, tokenizer)

100%|██████████| 157/157 [00:15<00:00, 10.21it/s]


In [10]:
evaluate(y_true, y_pred)

Accuracy: 0.217
Accuracy for label 0: 0.000
Accuracy for label 1: 1.000

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       123
           1       0.22      1.00      0.36        34

    accuracy                           0.22       157
   macro avg       0.11      0.50      0.18       157
weighted avg       0.05      0.22      0.08       157


Confusion Matrix:
[[  0 123   0]
 [  0  34   0]
 [  0   0   0]]


In [13]:
OUTPUT_DIR = "falcon-clf"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

training_arguments = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=20,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8, # 4
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    evaluation_strategy="epoch"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    max_seq_length=1024,
)

Map:   0%|          | 0/623 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [14]:
trainer.train()

Epoch,Training Loss,Validation Loss
0,0.7984,0.716728
1,0.7175,0.69545
2,0.6092,0.668957
3,0.5426,0.654854
4,0.7676,0.641559
5,0.6552,0.621609
6,0.5989,0.603875
8,0.4591,0.561493
9,0.5415,0.54286
10,0.4794,0.518745


TrainOutput(global_step=1540, training_loss=0.5443193525462956, metrics={'train_runtime': 3500.0788, 'train_samples_per_second': 3.56, 'train_steps_per_second': 0.44, 'total_flos': 3.3736586408976384e+16, 'train_loss': 0.5443193525462956, 'epoch': 19.78})

In [15]:
trainer.model.save_pretrained("falcon-clf")

In [16]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████| 157/157 [00:21<00:00,  7.16it/s]

Accuracy: 0.217
Accuracy for label 0: 0.000
Accuracy for label 1: 1.000

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       123
           1       0.22      1.00      0.36        34

    accuracy                           0.22       157
   macro avg       0.11      0.50      0.18       157
weighted avg       0.05      0.22      0.08       157


Confusion Matrix:
[[  0 123   0]
 [  0  34   0]
 [  0   0   0]]





In [17]:
from kaggle_secrets import UserSecretsClient

import wandb

user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("text-summarizer")
secret_value_1 = user_secrets.get_secret("wandb")

In [18]:
import huggingface_hub

wandb.login(key = secret_value_1)

huggingface_hub.login(token=secret_value_0)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [19]:
trainer.push_to_hub()

events.out.tfevents.1713749439.e46554ad8bbb.34.1:   0%|          | 0.00/34.8k [00:00<?, ?B/s]

events.out.tfevents.1713749420.e46554ad8bbb.34.0:   0%|          | 0.00/4.85k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/50.3M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.22k [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/suneeln-duke/falcon-clf/commit/73bbf245a04ff831cb2d6222c5d75a0e29c29366', commit_message='End of training', commit_description='', oid='73bbf245a04ff831cb2d6222c5d75a0e29c29366', pr_url=None, pr_revision=None, pr_num=None)

In [20]:
from transformers import pipeline

In [21]:
classifier = pipeline(model=f"suneeln-duke/falcon-clf", device_map="auto")

adapter_config.json:   0%|          | 0.00/573 [00:00<?, ?B/s]

In [23]:
text = generate_test_prompt({
    'text': "Ram is trying to decide between buying a burgrer or a pizza for dinner"
})

In [24]:
%%time

# classifier(text)

result = classifier(text, pad_token_id=classifier.tokenizer.eos_token_id)
answer = result[0]['generated_text'].split("=")[-1].lower()

CPU times: user 489 ms, sys: 15.9 ms, total: 505 ms
Wall time: 503 ms


In [25]:
answer

'### instruction:\n            classify whether the given chunk involves a decision that will effect the story or not.\n            a decision is defined as when the character goes about making a choice between two or more options. \n            the decision should be significant enough to affect the story in a major way.\n            it doesn\'t really involve emotions, feelings or thoughts, but what the character does, or what happens to them.\n            this involes interactions between characters, or the character and the environment.\n            what isn\'t a decision is chunks describing the setting, or the character\'s thoughts or feelings.\n            return the answer as the corresponding decision label "yes" or "no"\n            \n            ### text:\n            ram is trying to decide between buying a burgrer or a pizza for dinner\n            \n            ### decision:\n             '