In [3]:
%%capture
%pip install -U bitsandbytes
%pip install -U transformers
%pip install -U peft
%pip install -U accelerate
%pip install -U trl
%pip install -U wandb
%pip install -U evaluate
%pip install -U datasets
%pip install -U nltk rouge

In [4]:
import os
import shutil

# specify your directory
dir_path = '/kaggle/working/'

# remove all files in the directory
for filename in os.listdir(dir_path):
    file_path = os.path.join(dir_path, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print(f'Failed to delete {file_path}. Reason: {e}')

In [1]:
%%writefile run.py

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, DataCollatorForLanguageModeling
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch, wandb
from datasets import load_dataset
from trl import SFTTrainer

import numpy as np
import evaluate

import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from rouge import Rouge

base_model="mistralai/Mistral-7B-Instruct-v0.2"
finetuned_model="/kaggle/input/mistral-aes-v2/pytorch/mistralv2-414/1"

train_dataset_name="/kaggle/input/essay-train-v3"
validate_dataset_name="/kaggle/input/essay-validate"
test_dataset_name="/kaggle/input/test-essay"

new_model = "/kaggle/input/mistral_epoch1-2/pytorch/aes/9"

#Importing the dataset
train_data = load_dataset(train_dataset_name, split="train")
validate_data =load_dataset(validate_dataset_name, split="train")
test_data =load_dataset(test_dataset_name, split="test")

# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
    return_dict=True,
)

model.config.use_cache = True # silence the warnings. Please re-enable for inference!

# this is not used for inference, but it does solve the error low storage
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

ft_model = PeftModel.from_pretrained(model, finetuned_model)
ft_model.eval()


def generate_prompt(data_point):
    """Gen. input text based on a prompt, task instruction, (context info.), and answer

    :param data_point: dict: Data point
    :return: dict: tokenized prompt
    """
    #prefix_text = 'Below is an instruction that describes a task. Write a response that ' \
    #           'appropriately completes the request.\n\n'
    # Samples with additional context into.
    if data_point['input']:
        text = f"""{data_point["instruction"]}\n{data_point["input"]}"""
    # Without
    else:
        text = f"""<s>[INST]{data_point["instruction"]} [/INST]{data_point["output"]} </s>"""
    return text

#add the "prompt" column in the dataset
test_text_column = [generate_prompt(data_point) for data_point in test_data]
test_data = test_data.add_column("text", test_text_column)


max_seq_length = 3300
col_to_delete = ['input', 'instruction', 'filename']

test_data = test_data.map(lambda samples: tokenizer(samples["text"], truncation=True, max_length=max_seq_length, padding="max_length"), batched=True, remove_columns=col_to_delete)
test_data = test_data.add_column("labels", test_data ['input_ids'])




#------------------------TESTING------------------------
# Suppress logging
logging.set_verbosity(logging.CRITICAL)

# Initialize the pipeline
pipe = pipeline(task="text-generation", model=ft_model, tokenizer=tokenizer, max_length=3500)

# Initialize an empty list to store the results
results = []

# Initialize Rouge for ROUGE scores
rouge = Rouge()

# Initialize the smoothing function
smoothie = SmoothingFunction().method4

# Iterate over all the data in text_data
for i in range(len(test_data)):
    # Get the text from the current row
    text = test_data['text'][i]
    
    # Pass the text to the pipeline
    result = pipe(
        f"<s>[INST] {text} [/INST]",     
        #max_new_tokens=50, 
        temperature=0.7, 
        top_k=50, 
        top_p=0.95,
        num_return_sequences=1,)
    
    # Get the generated text
    generated_text = result[0]['generated_text']
    
    # Only keep the lines after the [/INST] tag
    output = generated_text.split('[/INST]', 1)[-1].strip()
    
    # Get the actual output
    expected_output = test_data['output'][i]
    
    # Calculate BLEU score with smoothing function
    bleu_score = sentence_bleu([expected_output.split()], output.split(), smoothing_function=smoothie)
    
    # Calculate ROUGE scores
    rouge_scores = rouge.get_scores(output, expected_output)[0]['rouge-l']
    
    # Calculate METEOR score
    #meteor = meteor_score([expected_output], output)
    
    # Append the result to the results list
    results.append([text, generated_text, output, expected_output, bleu_score, rouge_scores])

# Convert the results list into a DataFrame
df = pd.DataFrame(results, columns=['Text', 'Model Output', 'Actual Output', 'Expected Output', 'BLEU Score', 'ROUGE Scores'])

df.to_csv('eval-metrics-v1-1104.csv', index=False)


Writing run.py


In [25]:
%%time
!python run.py

2024-04-09 14:03:58.060167: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-09 14:03:58.060227: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-09 14:03:58.061631: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  return self.fget.__get__(instance, owner)()
Loading checkpoint shards: 100%|██████████████████| 2/2 [00:05<00:00,  2.69s/it]
CPU times: user 7.12 s, sys: 1.82 s, total: 8.94 s
Wall time: 9min 4s


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, DataCollatorForLanguageModeling
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch, wandb
from datasets import load_dataset
from trl import SFTTrainer

import numpy as np
import evaluate

ModuleNotFoundError: No module named 'peft'

In [3]:
base_model="/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1"

train_dataset_name="/kaggle/input/essay-train-v3"
validate_dataset_name="/kaggle/input/essay-validate"
test_dataset_name="/kaggle/input/test-essay"

new_model = "/kaggle/input/mistral_epoch1-2/pytorch/aes/9"

In [4]:
#Importing the dataset
train_data = load_dataset(train_dataset_name, split="train")
validate_data =load_dataset(validate_dataset_name, split="train")
test_data =load_dataset(test_dataset_name, split="test")

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [5]:
# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)

model.config.use_cache = True # silence the warnings. Please re-enable for inference!

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


(True, True)

In [None]:
from peft import PeftModel

ft_model = PeftModel.from_pretrained(model, "/kaggle/input/mistral_epoch1-2/pytorch/aes/9")
ft_model.eval()

In [None]:
def generate_prompt(data_point):
    """Gen. input text based on a prompt, task instruction, (context info.), and answer

    :param data_point: dict: Data point
    :return: dict: tokenized prompt
    """
    #prefix_text = 'Below is an instruction that describes a task. Write a response that ' \
    #           'appropriately completes the request.\n\n'
    # Samples with additional context into.
    if data_point['input']:
        text = f"""<s>[INST]{data_point["instruction"]}\n{data_point["input"]} [/INST]{data_point["output"]}</s>"""
    # Without
    else:
        text = f"""<s>[INST]{data_point["instruction"]} [/INST]{data_point["output"]} </s>"""
    return text

#add the "prompt" column in the dataset
train_text_column = [generate_prompt(data_point) for data_point in train_data]
validate_text_column = [generate_prompt(data_point) for data_point in validate_data]

train_data = train_data.add_column("text", train_text_column)
validate_data = validate_data.add_column("text", validate_text_column)


In [None]:
def generate_prompt(data_point):
    """Gen. input text based on a prompt, task instruction, (context info.), and answer

    :param data_point: dict: Data point
    :return: dict: tokenized prompt
    """
    #prefix_text = 'Below is an instruction that describes a task. Write a response that ' \
    #           'appropriately completes the request.\n\n'
    # Samples with additional context into.
    if data_point['input']:
        text = f"""{data_point["instruction"]}\n{data_point["input"]}"""
    # Without
    else:
        text = f"""<s>[INST]{data_point["instruction"]} [/INST]{data_point["output"]} </s>"""
    return text

#add the "prompt" column in the dataset
test_text_column = [generate_prompt(data_point) for data_point in test_data]

test_data = test_data.add_column("text", test_text_column)


In [None]:
max_seq_length = 3300
col_to_delete = ['input', 'instruction', 'filename']

train_data = train_data.map(lambda samples: tokenizer(samples["text"], truncation=True, max_length=max_seq_length, padding="max_length"), batched=True, remove_columns=col_to_delete)
validate_data = validate_data.map(lambda samples: tokenizer(samples["text"], truncation=True, max_length=max_seq_length, padding="max_length"), batched=True, remove_columns=col_to_delete)
test_data = test_data.map(lambda samples: tokenizer(samples["text"], truncation=True, max_length=max_seq_length, padding="max_length"), batched=True, remove_columns=col_to_delete)


train_data = train_data.add_column("labels", train_data['input_ids'])
validate_data = validate_data.add_column("labels", validate_data ['input_ids'])
test_data = test_data.add_column("labels", test_data ['input_ids'])

In [None]:
train_data.set_format("torch")
test_data.set_format("torch")
validate_data.set_format("torch")

In [6]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    print(torch.cuda.device_count())
    model.is_parallelizable = True
    model.model_parallel = True
    #ft_model.is_parallelizable = True
    #ft_model.model_parallel = True

2


In [None]:
test_data['output'][0]

In [None]:
test1 = "I want you to act as a teacher that grades the essays of the students based on the instructions. The scores range from 1.0 to 5.0 in increments of 0.5. Your task is to give numerical score and text feedback if only needed. Text feedback must be short and one sentence only. Do not be afraid to mark perfect score. You must only reply using this format. \nScore: \nFeedback:\nInstruction:For this week's reflection, do an ACADEMIC LEARNING based on our assigned videos and/or articles. \n(Refer to our course guide and other materials in the Files Tab)\n\nYour reflection may start with the following for example:\n\n“The article/video(SPECIFY THE TITLE) have made me aware of the importance of these formal systems of organization that enable people carry out tasks in the workplace.” \n\nYour reflection must us the 3-2-1 method:\n3-2-1 (Write 3 things you've learned, 2 things you still want to learn, and 1 question about your learning.)\nEssay:Article: lo and behold Reveries of the connected world By Elon musk\nTopic: The Dark Side\nReflection:\n3 things I’ve learned\n1. Internet is a double edged sword\n2. Be mindful of what you will do in the internet\n3. Internet is a manifestation of the devil\n2 things I still want to learn\n1. The maximum capacity of what the internet can do\n2. How far can the internet go\n1 question about my learning\n1. Is the internet really a blessing or a curse to the human kind?"

In [None]:
%%capture
%pip install -U nltk rouge

In [None]:
test_data

In [7]:
logging.set_verbosity(logging.CRITICAL)
    
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer = tokenizer, 
    torch_dtype=torch.bfloat16, 
    device_map="auto"
)

prompt = "How become a DataCamp certified data professional"

sequences = pipe(
    f"<s>[INST] {prompt} [/INST]",
    do_sample=True,
    max_new_tokens=100, 
    temperature=0.7, 
    top_k=50, 
    top_p=0.95,
    num_return_sequences=1,
)
print(sequences[0]['generated_text'])

RuntimeError: cutlassF: no kernel found to launch!

In [None]:
import pandas as pd
from transformers import pipeline
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from rouge import Rouge

# Suppress logging
logging.set_verbosity(logging.CRITICAL)

# Initialize the pipeline
pipe = pipeline(task="text-generation", model=ft_model, tokenizer=tokenizer, max_length=3500)

# Initialize an empty list to store the results
results = []

# Initialize Rouge for ROUGE scores
rouge = Rouge()

# Initialize the smoothing function
smoothie = SmoothingFunction().method4

# Iterate over all the data in text_data
for i in range(len(test_data)):
    # Get the text from the current row
    text = test_data['text'][i]
    
    # Pass the text to the pipeline
    result = pipe(f"<s>[INST] {text} [/INST]")
    
    # Get the generated text
    generated_text = result[0]['generated_text']
    
    # Only keep the lines after the [/INST] tag
    output = generated_text.split('[/INST]', 1)[-1].strip()
    
    # Get the actual output
    expected_output = test_data['output'][i]
    
    # Calculate BLEU score with smoothing function
    bleu_score = sentence_bleu([expected_output.split()], output.split(), smoothing_function=smoothie)
    
    # Calculate ROUGE scores
    rouge_scores = rouge.get_scores(output, expected_output)[0]['rouge-l']
    
    # Calculate METEOR score
    #meteor = meteor_score([expected_output], output)
    
    # Append the result to the results list
    results.append([text, generated_text, output, expected_output, bleu_score, rouge_scores])

# Convert the results list into a DataFrame
df = pd.DataFrame(results, columns=['Text', 'Model Output', 'Actual Output', 'Expected Output', 'BLEU Score', 'ROUGE Scores'])

df.to_csv('eval-metrics-v1-1104.csv', index=False)

# Print the DataFrame
df


In [None]:
df.to_csv('output.csv', index=False)

In [None]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    print(torch.cuda.device_count())
    model.is_parallelizable = True
    model.model_parallel = True

In [None]:
#Adding the adapters in the layers

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [None]:
training_arguments=TrainingArguments(
    output_dir = "Mistral_AES_v2",
    warmup_steps=1,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    #max_steps=500,
    num_train_epochs=8, # changed to 3 
    weight_decay=0.001,
    learning_rate=2.5e-5,                # Want a small lr for finetuning
    fp16=False,                          #might need to set this to true
    bf16=False,
    optim="paged_adamw_32bit",
    logging_steps=50,                    # When to start reporting loss
    logging_dir="/kaggle/working/logs",  # Directory for storing logs
    save_strategy="epoch",               # Save the model checkpoint every step
    #save_steps=287, #287                # Save checkpoints every 96 steps 1/3 each epoch
    evaluation_strategy="epoch",         # Evaluate the model every logging step
    eval_steps=50,                       # Evaluate and save checkpoints every 287 steps
    do_eval=True,                        # Perform evaluation at the end of training
    report_to="wandb",                   # Comment this out if you don't want to use weights & baises        # Name of the W&B run (optional)
    run_name="IT_Era_Run_Epoch",                # Name of the W&B run (optional)
    lr_scheduler_type="constant",
    load_best_model_at_end=True,
    save_total_limit=8,
    do_predict=True,
)

In [None]:
collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

max_seq_length = 3300

trainer = SFTTrainer(
  model=model,
  peft_config=peft_config,
  max_seq_length=max_seq_length,
  tokenizer=tokenizer,
  #packing=False,
  args=training_arguments,
  dataset_text_field="text",
  train_dataset=train_data,
  eval_dataset=validate_data,
  data_collator=collator,
  #compute_metrics=compute_metrics,
  #preprocess_logits_for_metrics=preprocess_logits_for_metrics
)

In [None]:
#trainer.train(resume_from_checkpoint="/kaggle/working/Mistral_AES_v2/checkpoint-574") 

In [None]:
trainer.train() 

In [None]:
wandb.finish()

In [None]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True

In [None]:
best_model_checkpoint = trainer.state.best_model_checkpoint
best_model_checkpoint 

In [None]:
#try:
#    trainer.model.push_to_hub(new_model, use_temp_dir=False)
#except:
#    print("An exception occurred")

In [None]:
#!1eval_results = trainer.evaluate()

# Print the accuracy
#print(eval_results)