In [4]:
import os
os.environ["WANDB_PROJECT"]="mistral_finetuning"

from enum import Enum
from functools import partial

import numpy as np
import pandas as pd
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import get_peft_model, PeftModel, LoraConfig, TaskType
from trl import SFTTrainer
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging,
                          set_seed)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

seed = 42
set_seed(seed)

In [53]:
model_name="mistralai/Mistral-7B-Instruct-v0.2"
dataset_name = "FinGPT/fingpt-sentiment-train"
dataset = load_dataset(dataset_name)


In [54]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['input', 'output', 'instruction'],
        num_rows: 76772
    })
})


In [55]:
# Convert dataset to pandas DataFrame
df = dataset['train'].to_pandas()

# Display the DataFrame
print(df.head())


                                               input               output  \
0  Teollisuuden Voima Oyj , the Finnish utility k...              neutral   
1  Sanofi poaches AstraZeneca scientist as new re...              neutral   
2  Starbucks says the workers violated safety pol...  moderately negative   
3                      $brcm raises revenue forecast             positive   
4  Google parent Alphabet Inc. reported revenue a...  moderately negative   

                                         instruction  
0  What is the sentiment of this news? Please cho...  
1  What is the sentiment of this news? Please cho...  
2  What is the sentiment of this news? Please cho...  
3  What is the sentiment of this tweet? Please ch...  
4  What is the sentiment of this news? Please cho...  


In [56]:
unique_outputs = set(dataset['train']['output'])

#label_mapping = {'mildly positive': 0, 'positive': 1, 'strong positive':2, 'moderately positive': 3, 'negative': 4, 'neutral': 5}  # Add more mappings as needed
label_mapping = {label: index for index, label in enumerate(unique_outputs)}

In [57]:
print(unique_outputs)
print(label_mapping)

{'strong positive', 'moderately positive', 'neutral', 'strong negative', 'positive', 'negative', 'mildly negative', 'moderately negative', 'mildly positive'}
{'strong positive': 0, 'moderately positive': 1, 'neutral': 2, 'strong negative': 3, 'positive': 4, 'negative': 5, 'mildly negative': 6, 'moderately negative': 7, 'mildly positive': 8}


In [58]:
X_train = []
X_eval = []
X_test = []
X_trail = []
for sentiment in label_mapping:
    train, test  = train_test_split(df[df.output==sentiment],
                                    train_size=120,
                                    test_size=20,
                                    random_state=42)
    X_trail.append(train)
    X_test.append(test)

X_trail = pd.concat(X_trail).sample(frac=1, random_state=10)
X_test = pd.concat(X_test)

df2=X_trail

for sentiment in label_mapping:
    train, test  = train_test_split(df2[df2.output==sentiment],
                                    train_size=100,
                                    test_size=20,
                                    random_state=42)
    X_train.append(train)
    X_eval.append(test)

X_train = pd.concat(X_train).sample(frac=1, random_state=10)
X_eval = pd.concat(X_eval)

In [59]:
print(len(X_train))
print(len(X_eval))
print(len(X_test))

900
180
180


In [60]:
X_train = X_train.reset_index(drop=True)
X_eval = X_eval.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)

In [61]:
X_test = X_test.head()
y_true = X_test.output

In [62]:
print(len(X_train))
print(len(X_eval))
print(len(X_test))

900
180
5


In [63]:
def generate_prompt(data_point):
    return f"""
            [INST]{data_point["instruction"]}[/INST]

            [{data_point["input"]}] = {data_point["output"]}
            """.strip()

def generate_test_prompt(data_point):
    return f"""
            [INST]{data_point["instruction"]}[/INST]

            [{data_point["input"]}] = """.strip()

X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1), 
                       columns=["input"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1), 
                      columns=["input"])

In [50]:
for index, row in X_train.head().iterrows():
    print(f"Row {index}:")
    for column_name, value in row.items():
        print(f"  {column_name}: {value}")

Row 0:
  input: [INST]What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.[/INST]

            [$CSX is up today to report.  Wall Street is expecting EPS to be at $0.37, and revenues at $2,732B.] = positive
Row 1:
  input: [INST]What is the sentiment of this news? Please choose an answer from {strong negative/moderately negative/mildly negative/neutral/mildly positive/moderately positive/strong positive}.[/INST]

            [Cristiano Amon, Qualcomm CEO, joins 'TechCheck' to discuss the company's future aspirations and work with the auto industry from the Consumer Electronics Show in Las Vegas, Nevada.] = moderately positive
Row 2:
  input: [INST]What is the sentiment of this news? Please choose an answer from {strong negative/moderately negative/mildly negative/neutral/mildly positive/moderately positive/strong positive}.[/INST]

            [There could be storm clouds ahead.] = mildly negative
Row 3:
  input: [INST]What is the sentiment of 

In [34]:
train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [35]:
train_data

Dataset({
    features: ['input'],
    num_rows: 900
})

In [5]:
peft_config = LoraConfig(r=8,
                         lora_alpha=16,
                         lora_dropout=0.1,
                         target_modules=["gate_proj","q_proj","lm_head","o_proj","k_proj","embed_tokens","down_proj","up_proj","v_proj"],
                         task_type=TaskType.CAUSAL_LM)

In [37]:
tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          trust_remote_code=True,
                                          padding_side="left",
                                          add_bos_token=True,
                                          add_eos_token=True,
                                         )

tokenizer.pad_token = tokenizer.eos_token




In [38]:
model = AutoModelForCausalLM.from_pretrained(model_name)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# cast non-trainable params in fp16
for p in model.parameters():
    if not p.requires_grad:
        p.data = p.to(torch.float16)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

trainable params: 21,549,056 || all params: 7,263,281,152 || trainable%: 0.2966848666468914


In [39]:
output_dir = "mistral_instruct"
per_device_train_batch_size = 1
per_device_eval_batch_size = 1
gradient_accumulation_steps = 8
logging_steps = 5
learning_rate = 5e-4
max_grad_norm = 1.0
max_steps = 250
num_train_epochs=10
warmup_ratio = 0.1
lr_scheduler_type = "cosine"
max_seq_length = 2048

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    save_strategy="no",
    evaluation_strategy="epoch",
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    max_grad_norm=max_grad_norm,
    weight_decay=0.1,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    fp16=True,
    report_to=["tensorboard", "wandb"],
    hub_private_repo=True,
    push_to_hub=True,
    num_train_epochs=num_train_epochs,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False}
)


In [40]:
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    tokenizer=tokenizer,
    packing=True,
    dataset_text_field="input",
    max_seq_length=max_seq_length,
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [41]:
trainer.train()
trainer.save_model()

Epoch,Training Loss,Validation Loss
0,No log,2.508028
1,2.532000,1.357067
2,2.031900,1.138496
3,1.252000,1.174159
4,1.125800,1.110787
5,0.943200,1.136815
6,0.801000,1.19588
8,0.535100,1.248224




adapter_model.safetensors:   0%|          | 0.00/611M [00:00<?, ?B/s]

events.out.tfevents.1715543143.fc49bb3d6714.6731.1:   0%|          | 0.00/9.11k [00:00<?, ?B/s]

events.out.tfevents.1715542212.fc49bb3d6714.6731.0:   0%|          | 0.00/6.37k [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

In [20]:
def evaluate(y_true, y_pred):
    labels = {'strong positive', 'mildly positive', 'negative', 'moderately positive', 'moderately negative', 'neutral', 'mildly negative', 'strong negative', 'positive'}
    mapping = {'strong positive': 0, 'mildly positive': 1, 'negative': 2, 'moderately positive': 3, 'moderately negative': 4, 'neutral': 5, 'none': 5,'mildly negative': 6, 'strong negative': 7, 'positive': 8}
    def map_func(x):
        return mapping.get(x, 1)
    
    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [23]:
def predict(X_test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["input"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer,
                        max_new_tokens = 1, 
                        temperature = 0.0,
                       )
        result = pipe(prompt, pad_token_id=pipe.tokenizer.eos_token_id)
        answer = result[0]['generated_text'].split("=")[-1].lower()
        if "strong positive" in answer:
            y_pred.append("strong positive")
        elif "mildly positive" in answer:
            y_pred.append("mildly positive")
        elif "negative" in answer:
            y_pred.append("negative")
        elif "moderately positive" in answer:
            y_pred.append("moderately positive")
        elif "moderately negative" in answer:
            y_pred.append("moderately negative")
        elif "neutral" in answer:
            y_pred.append("neutral")
        elif "mildly negative" in answer:
            y_pred.append("mildly negative")
        elif "strong negative" in answer:
            y_pred.append("strong negative")
        elif "positive" in answer:
            y_pred.append("positive")
        else:
            y_pred.append("none")
    return y_pred

In [7]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch


peft_model_id = "mistral_instruct"
device = "cuda"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
model = PeftModel.from_pretrained(model, peft_model_id)



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [64]:
X_test

Unnamed: 0,input,output,instruction
0,"Google Maps was updated with new features, lik...",strong positive,What is the sentiment of this news? Please cho...
1,These players are market leaders -- and future...,strong positive,What is the sentiment of this news? Please cho...
2,"Divergence, an ultra-rare stock market phenome...",strong positive,What is the sentiment of this news? Please cho...
3,Most investors in Apple (NASDAQ: AAPL) would h...,strong positive,What is the sentiment of this news? Please cho...
4,James 1:27. Religion that is pure and undefile...,strong positive,What is the sentiment of this news? Please cho...


In [65]:

X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["input"])
y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/5 [00:00<?, ?it/s]The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'Mu

In [66]:
evaluate(y_true, y_pred)

Accuracy: 0.000
Accuracy for label 0: 0.000

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       5.0
           5       0.00      0.00      0.00       0.0

    accuracy                           0.00       5.0
   macro avg       0.00      0.00      0.00       5.0
weighted avg       0.00      0.00      0.00       5.0


Confusion Matrix:
[[0 0 0]
 [0 0 0]
 [0 0 0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
