In [1]:
import torch
from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
    AutoTokenizer, AutoModelForQuestionAnswering,GPT2LMHeadModel,
    TrainingArguments,
    pipeline,
    logging,
    Trainer,
    DataCollatorForLanguageModeling
)

#from sklearn.model_selection import train_test_split

import pandas as pd

In [2]:
dataset = load_dataset("team-bay/data-science-qa",split=['train'])[0]

In [3]:
dataset=dataset.remove_columns(['type'])

In [4]:
full_train_dataset=dataset.train_test_split(test_size=0.2)

In [5]:
test_dataset=full_train_dataset['test']

In [6]:
train_dataset=full_train_dataset['train'].train_test_split(test_size=0.2)

In [7]:
val_dataset=train_dataset['test']

In [8]:
train_dataset.pop('test')

Dataset({
    features: ['question', 'answer'],
    num_rows: 76
})

In [9]:
train_dataset=train_dataset['train']

In [10]:
test_dataset

Dataset({
    features: ['question', 'answer'],
    num_rows: 95
})

In [11]:
val_dataset

Dataset({
    features: ['question', 'answer'],
    num_rows: 76
})

In [12]:
health_dataset_dict = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test': test_dataset
})

In [13]:
MODEL_NAME = 'gpt2'
model = GPT2LMHeadModel.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


In [14]:
def preprocess_function(examples):
    inputs = [q + " [SEP] " + a for q, a in zip(examples["question"], examples["answer"])]
   # The "inputs" are the tokenized answer:
#    inputs = [doc for doc in examples["question"] + " [SEP] " + doc for doc in examples["answer"]]
    
    model_inputs = tokenizer(inputs, max_length=200, truncation=True, padding=True, return_tensors="pt")
  
   # The "labels" are the tokenized outputs:
    return model_inputs

In [15]:
tokenized_dataset = health_dataset_dict.map(preprocess_function, batched=True)

Map:   0%|          | 0/302 [00:00<?, ? examples/s]

Map:   0%|          | 0/76 [00:00<?, ? examples/s]

Map:   0%|          | 0/95 [00:00<?, ? examples/s]

In [16]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'input_ids', 'attention_mask'],
        num_rows: 302
    })
    validation: Dataset({
        features: ['question', 'answer', 'input_ids', 'attention_mask'],
        num_rows: 76
    })
    test: Dataset({
        features: ['question', 'answer', 'input_ids', 'attention_mask'],
        num_rows: 95
    })
})

In [17]:
# Global Parameters
L_RATE = 3e-4
BATCH_SIZE = 8
PER_DEVICE_EVAL_BATCH = 8
WEIGHT_DECAY = 0.01
SAVE_TOTAL_LIM = 3
NUM_EPOCHS = 3

In [18]:
# Set up training arguments
training_args = TrainingArguments(
   output_dir="./results",
   evaluation_strategy="epoch",
   learning_rate=L_RATE,
   report_to=None,
   logging_steps=10,
   per_device_train_batch_size=BATCH_SIZE,
   per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
   weight_decay=WEIGHT_DECAY,
   save_total_limit=SAVE_TOTAL_LIM,
   num_train_epochs=NUM_EPOCHS,
   push_to_hub=False
)



In [19]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    tokenizer=tokenizer,
    args=training_args,
    data_collator= DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

[2024-12-22 23:04:27,553] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


  trainer = Trainer(




/ibex/ai/home/shaima0d/KSL_Trainings/rts-tutorials/install/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
  def forward(ctx, input, weight, bias=None):
  def backward(ctx, grad_output):


In [20]:
model.config.use_cache = False
trainer.train()

Epoch,Training Loss,Validation Loss
1,2.5818,2.224871
2,1.6312,2.11626
3,1.108,2.175313


TrainOutput(global_step=114, training_loss=1.8529781165875887, metrics={'train_runtime': 11.8675, 'train_samples_per_second': 76.343, 'train_steps_per_second': 9.606, 'total_flos': 45311712768000.0, 'train_loss': 1.8529781165875887, 'epoch': 3.0})

In [22]:
def load_model(model_path):
    model = GPT2LMHeadModel.from_pretrained(model_path)
    return model

In [23]:
def load_tokenizer(tokenizer_path):
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    return tokenizer

In [24]:
def generate_text(model_path, sequence, max_length):
    
    model = load_model(model_path)
    tokenizer = load_tokenizer(model_path)
    ids = tokenizer.encode(f'{sequence}', return_tensors='pt')
    final_outputs = model.generate(
        ids,
        do_sample=True,
        max_length=max_length,
        pad_token_id=model.config.eos_token_id,
        top_k=50,
        top_p=0.95,
    )
    print(tokenizer.decode(final_outputs[0], skip_special_tokens=True))

In [29]:
model2_path = "./results/checkpoint-114/"
sequence2 = "[Q] How do I do reinforcement learning?"
max_len = 150
generate_text(model2_path, sequence2, max_len) 

[Q] How do I do reinforcement learning? [SEP] reinforcement learning is a machine learning paradigm where an agent learns to make decisions based on actions made by other agents in the environment, often leveraging pretext actions or pretext tasks to learn useful representations without explicit training. The goal of reinforcement learning is to maximize cumulative rewards and minimize cumulative losses, enabling agents to make better decisions by interacting with each other more often. Expectation-based models are an example of reinforcement learning where an agent learns to maximize cumulative rewards while evaluating actions across multiple scenarios. Expectation-based models are applications such as gaming and social media analytics, where an agent must decide which objective to focus on to maximize cumulative rewards. Expectation-based models are examples of objective-based


In [31]:
from transformers import pipeline, set_seed
generate = pipeline("text-generation", model="openai-community/gpt2",
                    clean_up_tokenization_spaces=True,
                    device='cuda')

Device set to use cuda


In [33]:
set_seed(42)
generate("[Q] How do I do reinforcement learning?", max_length=30, num_return_sequences=3) 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': '[Q] How do I do reinforcement learning? What you learn that makes it better, and how do you improve it? How are you going to'},
 {'generated_text': '[Q] How do I do reinforcement learning?[/Q] [A] [B]How do I do all of this?[/B]'},
 {'generated_text': "[Q] How do I do reinforcement learning?[/q]\n\nIf I'm interested in training a character for different characters, I go through"}]