### Imports

In [26]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          LlamaForSequenceClassification,
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

### Loading and processing the dataset

In [51]:
df = pd.read_csv("datasets/md-data-statement.csv")
df.head()

Unnamed: 0,Statement,NoShow
0,Patient Information:\n- Gender: Female\n- Age:...,No
1,Patient Information:\n- Gender: Male\n- Age: 5...,No
2,Patient Information:\n- Gender: Female\n- Age:...,No
3,Patient Information:\n- Gender: Female\n- Age:...,No
4,Patient Information:\n- Gender: Female\n- Age:...,No


In [52]:
yes_df = df[df['NoShow'] == 'Yes']
no_df = df[df['NoShow'] == 'No']

min_count = min(len(yes_df), len(no_df), 2000)

yes_sample = yes_df.sample(n=min_count, random_state=42)
no_sample = no_df.sample(n=min_count, random_state=42)

balanced_df = pd.concat([yes_sample, no_sample]).reset_index(drop=True)

df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)

In [53]:
# Split the DataFrame
train_size = 0.7
eval_size = 0.2

# Calculate sizes
train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))

# Split the data
X_train = df[:train_end]
X_eval = df[train_end:eval_end]
X_test = df[eval_end:]

In [54]:
def generate_prompt(data_point):
    return f"""
    Patient Information: Gender, Age, Appointment Date, Scheduled For, Medical Conditions, Hypertension, Diabetes, Alcoholism, Handicap, SMS Reminder Sent.
    
    Based on the information provided above, determine whether the patient will show up for their appointment.
    
    Important Notes:
    - Consider the all of the patient's inforamtion, including reminders received.
    - Focus on any patterns that might indicate a no-show, such as chronic conditions or missed reminders such as SMS as well as the difference in Appointment Date and Scheduled For.

    Your response should be in the following format:
    - Label: "Yes" if the patient will not show up, or "No" if they will show up.

    Example Output:
    - text: {data_point["Statement"]}
    - label: {data_point["NoShow"]} """.strip()

def generate_test_prompt(data_point):
    return f""" 
    Patient Information: Gender, Age, Appointment Date, Scheduled For, Medical Conditions, Hypertension, Diabetes, Alcoholism, Handicap, SMS Reminder Sent.
    
    Based on the information provided above, determine whether the patient will show up for their appointment.
    
    **Important Notes**:
    - Consider the patient's medical conditions and reminders received.
    - Focus on any patterns that might indicate a no-show, such as chronic conditions or missed reminders.

    Your response should be in the following format:
    - **Label**: "Yes" if the patient will not show up, or "No" if they will show up.

    Example Output:
    - text: {data_point["Statement"]}
    - label: """.strip()

# Generate prompts for training and evaluation data
X_train.loc[:,'text'] = X_train.apply(generate_prompt, axis=1)
X_eval.loc[:,'text'] = X_eval.apply(generate_prompt, axis=1)

# Generate test prompts and extract true labels
y_true = X_test.loc[:,'NoShow']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [55]:
X_train

Unnamed: 0,Statement,NoShow,text
0,Patient Information:\n- Gender: Male\n- Age: 1...,Yes,"Patient Information: Gender, Age, Appointment ..."
1,Patient Information:\n- Gender: Male\n- Age: 6...,No,"Patient Information: Gender, Age, Appointment ..."
2,Patient Information:\n- Gender: Female\n- Age:...,Yes,"Patient Information: Gender, Age, Appointment ..."
3,Patient Information:\n- Gender: Male\n- Age: 9...,No,"Patient Information: Gender, Age, Appointment ..."
4,Patient Information:\n- Gender: Female\n- Age:...,No,"Patient Information: Gender, Age, Appointment ..."
...,...,...,...
2795,Patient Information:\n- Gender: Male\n- Age: 4...,No,"Patient Information: Gender, Age, Appointment ..."
2796,Patient Information:\n- Gender: Female\n- Age:...,Yes,"Patient Information: Gender, Age, Appointment ..."
2797,Patient Information:\n- Gender: Male\n- Age: 3...,Yes,"Patient Information: Gender, Age, Appointment ..."
2798,Patient Information:\n- Gender: Male\n- Age: 5...,Yes,"Patient Information: Gender, Age, Appointment ..."


In [56]:
X_train

Unnamed: 0,Statement,NoShow,text
0,Patient Information:\n- Gender: Male\n- Age: 1...,Yes,"Patient Information: Gender, Age, Appointment ..."
1,Patient Information:\n- Gender: Male\n- Age: 6...,No,"Patient Information: Gender, Age, Appointment ..."
2,Patient Information:\n- Gender: Female\n- Age:...,Yes,"Patient Information: Gender, Age, Appointment ..."
3,Patient Information:\n- Gender: Male\n- Age: 9...,No,"Patient Information: Gender, Age, Appointment ..."
4,Patient Information:\n- Gender: Female\n- Age:...,No,"Patient Information: Gender, Age, Appointment ..."
...,...,...,...
2795,Patient Information:\n- Gender: Male\n- Age: 4...,No,"Patient Information: Gender, Age, Appointment ..."
2796,Patient Information:\n- Gender: Female\n- Age:...,Yes,"Patient Information: Gender, Age, Appointment ..."
2797,Patient Information:\n- Gender: Male\n- Age: 3...,Yes,"Patient Information: Gender, Age, Appointment ..."
2798,Patient Information:\n- Gender: Male\n- Age: 5...,Yes,"Patient Information: Gender, Age, Appointment ..."


In [57]:
X_train.NoShow.value_counts()

Yes    1407
No     1393
Name: NoShow, dtype: int64

In [58]:
y_true.value_counts()

No     207
Yes    193
Name: NoShow, dtype: int64

### Convert to datasets

In [59]:
train_data = Dataset.from_pandas(X_train[['text']])
eval_data = Dataset.from_pandas(X_eval[['text']])

In [60]:
train_data['text'][1]

'Patient Information: Gender, Age, Appointment Date, Scheduled For, Medical Conditions, Hypertension, Diabetes, Alcoholism, Handicap, SMS Reminder Sent.\n    \n    Based on the information provided above, determine whether the patient will show up for their appointment.\n    \n    Important Notes:\n    - Consider the all of the patient\'s inforamtion, including reminders received.\n    - Focus on any patterns that might indicate a no-show, such as chronic conditions or missed reminders such as SMS as well as the difference in Appointment Date and Scheduled For.\n\n    Your response should be in the following format:\n    - Label: "Yes" if the patient will not show up, or "No" if they will show up.\n\n    Example Output:\n    - text: Patient Information:\n- Gender: Male\n- Age: 62\n- Appointment Date: 2016-04-28T07:27:12Z\n- Scheduled For: 2016-06-08T00:00:00Z\n- Medical Conditions:\n  * Hypertension: NO\n  * Diabetes: NO\n  * Alcoholism: NO\n- Handicap: NO\n- SMS Reminder Sent: YES\n  

### Loading the model and tokenizer

In [61]:
base_model_name = "meta-llama/Meta-Llama-3.1-8B-instruct"

In [62]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)


model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [63]:
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id


In [64]:
pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)

### Model evalution before fine-tuning

In [65]:
def predict(test, model, tokenizer, pipe):
    y_pred = []
    categories = ["Yes", "No"]
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]['text']
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")
    
    return y_pred

In [66]:
y_pred = predict(X_test, model, tokenizer, pipe)

100%|██████████| 400/400 [00:46<00:00,  8.55it/s]


In [67]:
def evaluate(y_true, y_pred):
    labels = ["Yes","No"]
    mapping = {label: idx for idx, label in enumerate(labels)}
    
    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data
    
    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [68]:
evaluate(y_true, y_pred)

Accuracy: 0.480
Accuracy for label Yes: 0.404
Accuracy for label No: 0.551

Classification Report:
              precision    recall  f1-score   support

         Yes       0.46      0.40      0.43       193
          No       0.50      0.55      0.52       207

    accuracy                           0.48       400
   macro avg       0.48      0.48      0.48       400
weighted avg       0.48      0.48      0.48       400


Confusion Matrix:
[[ 78 115]
 [ 93 114]]


### Extracting the linear modules names

In [69]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [70]:
modules = find_all_linear_names(model)
modules

['o_proj', 'k_proj', 'q_proj', 'gate_proj', 'down_proj', 'v_proj', 'up_proj']

### Setting up the model

In [71]:
output_dir="llama-3.1-fine-tuned-model"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules,
)

training_arguments = TrainingArguments(
    output_dir=output_dir,                    # directory to save and repository id
    num_train_epochs=1,                       # number of training epochs
    per_device_train_batch_size=1,            # batch size per device during training
    gradient_accumulation_steps=8,            # number of steps before performing a backward/update pass
    gradient_checkpointing=True,              # use gradient checkpointing to save memory
    optim="paged_adamw_32bit",
    logging_steps=1,                         
    learning_rate=2e-4,                       # learning rate, based on QLoRA paper
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,                        # max gradient norm based on QLoRA paper
    max_steps=-1,
    warmup_ratio=0.03,                        # warmup ratio based on QLoRA paper
    group_by_length=False,
    lr_scheduler_type="cosine",               # use cosine learning rate scheduler
    eval_strategy="steps",              # save checkpoint every epoch
    eval_steps = 0.2
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=512,
    packing=False,
    dataset_kwargs={
    "add_special_tokens": False,
    "append_concat_token": False,
    }
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/2800 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

### Model Training

In [72]:
# Train model
trainer.train()

Step,Training Loss,Validation Loss
70,0.1034,0.100343
140,0.0971,0.097218
210,0.0964,0.096649
280,0.1,0.096111
350,0.0999,0.096


TrainOutput(global_step=350, training_loss=0.14932155753884996, metrics={'train_runtime': 1180.7343, 'train_samples_per_second': 2.371, 'train_steps_per_second': 0.296, 'total_flos': 3.19675240808448e+16, 'train_loss': 0.14932155753884996, 'epoch': 1.0})

### Saving the model and tokenizer

In [73]:
# Save trained model and tokenizer
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

('llama-3.1-fine-tuned-model/tokenizer_config.json',
 'llama-3.1-fine-tuned-model/special_tokens_map.json',
 'llama-3.1-fine-tuned-model/tokenizer.json')

### Testing model after fine-tuning 

In [74]:
y_pred = predict(X_test, model, tokenizer, pipe)
evaluate(y_true, y_pred)

100%|██████████| 400/400 [01:18<00:00,  5.12it/s]


Accuracy: 0.730
Accuracy for label Yes: 0.829
Accuracy for label No: 0.638

Classification Report:
              precision    recall  f1-score   support

         Yes       0.68      0.83      0.75       193
          No       0.80      0.64      0.71       207

    accuracy                           0.73       400
   macro avg       0.74      0.73      0.73       400
weighted avg       0.74      0.73      0.73       400


Confusion Matrix:
[[160  33]
 [ 75 132]]
