# Dependencies

In [None]:
!pip install -q rouge_score
!pip install -q datasets
!pip install -q transformers
!pip install -q evaluate
!pip install -q accelerate
!pip install -q -i https://pypi.org/simple/ bitsandbytes
!pip install -q peft
!pip install -q trl
!pip install -q tqdm
!pip install -q pd
!pip install -q huggingface_hub

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.9/39.9 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incomp

In [None]:
import torch
import transformers
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, TrainingArguments, Trainer, pipeline, BitsAndBytesConfig, DataCollatorForLanguageModeling, GenerationConfig
from peft import LoraConfig, get_peft_model, TaskType, PeftModel, PeftConfig, prepare_model_for_kbit_training, AutoPeftModelForCausalLM
from trl import SFTTrainer
import evaluate

import pandas as pd

import time
from tqdm import tqdm

#Data Preparation/Preprocessing

In [None]:
MODEL_CHECKPOINT = 'mistralai/Mistral-7B-Instruct-v0.2'


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from datasets import load_dataset,DatasetDict,load_metric

# Loading custom dataset
path = "/content/drive/MyDrive/finetune_sum/data/train2.csv"
dataset = load_dataset('csv', data_files= path)

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'summary', 'dialogue'],
        num_rows: 2992
    })
})

In [None]:

# Split the dataset into train and test+validation sets
split_dataset = dataset['train'].train_test_split(test_size=0.2)
# Further split the test+validation set into validation and test sets
test_valid_split = split_dataset['test'].train_test_split(test_size=0.5)

# Combine splits into a single DatasetDict
data = DatasetDict({
    'train': split_dataset['train'],
    'validation': test_valid_split['train'],
    'test': test_valid_split['test']
})

# Example output
print(data)

DatasetDict({
    train: Dataset({
        features: ['id', 'summary', 'dialogue'],
        num_rows: 2393
    })
    validation: Dataset({
        features: ['id', 'summary', 'dialogue'],
        num_rows: 299
    })
    test: Dataset({
        features: ['id', 'summary', 'dialogue'],
        num_rows: 300
    })
})


In [None]:
print(data["train"][2]["dialogue"])



Sidratul Sadia: Hello?
Sidratul Sadia: Good morning.
Jane: Hi, Dina. It's Jane. How are you going?
Sidratul Sadia: Yeah. Fine. Thank you for asking.
Jane: Sorry. I was at work. I'm at work. I was on the phone.Sorry. 
Sidratul Sadia: Uh, no. No worries.
Sidratul Sadia: No worries. So it's regarding the installation date. Give me one second. Let
Sidratul Sadia: me check my schedule one more time. Okay. So would you be
Sidratul Sadia: available, uh, tomorrow to take the installation?
Jane: Yes, please. Yep. The sooner I get this in, the better.
Sidratul Sadia: Alright. So I just need to reconfirm few more things with you. Your house is
Sidratul Sadia: a single storey house or it's a double storey house?
Jane: Yes. It is. Single single story.
Sidratul Sadia: Okay. And the address is 6 Summercourt, Harness, Victoria 3337?
Jane: Yep.
Sidratul Sadia: Alright.
Sidratul Sadia: And, uh, k. So so for 300 liter SLA, your quoted amount is
Sidratul Sadia: $2,615, and I can see that your finance hav

In [None]:
print(data["train"][2]["summary"])


Call Summary:
1. The address for installation is 6 Summercourt, Harness, Victoria 3337.
2. The house is a single-storey or double-storey.
3. The sooner the installation is done, the better.
4. Additional wiring beyond 10 meters will cost $3 per meter, payable upfront to Sidratul Sadia's team.
5. The hot water service and electrical box are less than 5 meters apart.




##Formatting data in mistral format & HF login

*   <\s>[INST] What is your favourite condiment? [/INST]
Response.<\s>



In [None]:
def preprocess_data(example):
  dialogue = example["dialogue"]
  summary = example["summary"]
  prompt = f"""<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:
{dialogue}[/INST]
{summary}
</s>"""
  return {"text": prompt}

In [None]:
data_preprocessed = data.map(preprocess_data,remove_columns=["id", "dialogue", "summary"])


In [None]:
data_preprocessed

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 2393
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 299
    })
    test: Dataset({
        features: ['text'],
        num_rows: 300
    })
})

In [None]:
"""
merged both dialogue and label summaries into a training ready prompt-response format.
"""
print(data_preprocessed["train"][2]["text"])


<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:

Sidratul Sadia: Hello?
Sidratul Sadia: Good morning.
Jane: Hi, Dina. It's Jane. How are you going?
Sidratul Sadia: Yeah. Fine. Thank you for asking.
Jane: Sorry. I was at work. I'm at work. I was on the phone.Sorry. 
Sidratul Sadia: Uh, no. No worries.
Sidratul Sadia: No worries. So it's regarding the installation date. Give me one second. Let
Sidratul Sadia: me check my schedule one more time. Okay. So would you be
Sidratul Sadia: available, uh, tomorrow to take the installation?
Jane: Yes, please. Yep. The sooner I get this in, the better.
Sidratul Sadia: Alright. So I just need to reconfirm few more things with you. Your house is
Sidratul Sadia: a single storey house or it's a double storey house?
Jane: Yes. It is. Single single story.
Sidratul Sadia: Okay. And the address is 6 Summercourt, Harness, Victoria 3337?
Jane: Yep.
Sidratul Sadia: Alright.
Sidratul Sadia: And, uh, k. So so fo

In [None]:
data_preprocessed.set_format(type="torch")


In [None]:
MODEL_CHECKPOINT


'mistralai/Mistral-7B-Instruct-v0.2'

In [None]:
# huggingface login
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = 'right'

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

#Model finetuning

##Preparing base model in 4bit quantization

In [None]:
#Prepare base model in 4bit quantization
compute_dtype = getattr(torch, "float16")
use_4bit = True

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [None]:
#load our Mistral 7B base model as AutoModelForCausalLM
device_map = "auto"
model = AutoModelForCausalLM.from_pretrained(
    MODEL_CHECKPOINT,
    quantization_config=bnb_config,  # loading in 4-bit quantization
    device_map=device_map,
)

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
"""
updating model with the tokenizer pad_token_id as
defined earlier in order for both tokenizer and model stay synchronized in terms of special tokens they both use.
"""

# Configure the pad token in the base Mistral model
model.config.pad_token_id = tokenizer.pad_token_id

##Prepare the fine-tuning model LoRA config

In [None]:
"""
From research papers : We find that the most critical LoRA hyperparameter is
how many LoRA adapters are used in total and that LoRA on all linear transformer block layers are required to match full finetuning performance".

"""
print(model)


MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )

In [None]:
##So for Mistral 7B we select all the linear layers weights of the model for fine-tuning:

"""
q_proj,
k_proj,
v_proj,
o_proj,
gate_proj,
up_proj,
down_proj,
lm_head

"""

'\nq_proj,\nk_proj,\nv_proj,\no_proj,\ngate_proj,\nup_proj,\ndown_proj,\nlm_head\n\n'

In [None]:
# final LoRA configuration
#r is the rank of LoRA transformed matrices.
#golden rule that lora_alpha=2*r
"""
There is a golden rule that lora_alpha=2*r.
A good range specified to select r from - presented in original LoRA paper - is: 8, 16, 32, 64, 128, 256 or 512.
We select r=32 and lora_alpha=64 so we gain quite big memory usage reduction in the process.

"""
peft_config = LoraConfig(
        r=16,# earllier [32] :as reducing the r value in the LoRA configuration can help, as a smaller capacity model might generalize better with limited data.
        lora_alpha=32, #2r - golden rule
        lora_dropout=0.3, #[earlier 0.1] (increase it to introduce more regularization.)
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
            "lm_head",
        ],
    )

##Model training

In [None]:
"""
If you can't increase the batch size due to memory constraints,
you can use gradient accumulation steps to effectively simulate a larger batch size without increasing memory usage.
For example, if you want an effective batch size of 8 but can only fit a batch size of 2 in memory, set gradient_accumulation_steps=4.

"""

from transformers import TrainingArguments
import time

run_name = "peft-dialogue-summary-training"
path = '/content/drive/MyDrive/finetune_sum/training_output'
output_dir = f'{path}/peft-dialogue-summary-training-{str(int(time.time()))}_train2'

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,  # Increase batch size to stabilize training ##earlier [4]
    gradient_accumulation_steps=8,  # Adjusted to compensate for larger batch size, to reduce mem spike ##earlier [4]
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",  # Memory-efficient optimizer
    save_steps=40,  # More frequent saving due to smaller dataset (It saves checkpoints)
    logging_steps=30,  # Log more frequently to monitor training closely
    learning_rate=5e-5,  # Slightly lower learning rate for more stable training ##Earlier [2e-4],
    fp16=True, ##deals with mixed-precsion training
    eval_strategy="epoch",
    eval_steps= 40,  # Evaluate more frequently due to small dataset ##earlier [100]
    max_grad_norm=0.3,
    num_train_epochs=6,  # Increase epochs to allow more thorough training ##Earlier = [5]
    weight_decay=0.1,  # Slightly higher weight decay to prevent overfitting.Ex of weight decay is L2 Regularization ##earlier = [0.01]
    warmup_steps=10,  # Shorter warmup due to fewer steps overall #earlier = [20]
    lr_scheduler_type="linear",
    run_name=run_name
)


In [None]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
trainer = SFTTrainer(
    model=model,
    train_dataset=data_preprocessed["train"],
    eval_dataset=data_preprocessed["validation"], # remove you have low VRAM and getting OOM errors
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/2393 [00:00<?, ? examples/s]

Map:   0%|          | 0/299 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


##Start Training and save the adapter

  
> Always monitor the validation loss and generalization gap (difference between training and validation loss) to guide your choice.
Generalization Gap = Validation Loss - Training Loss




* Small Generalization Gap: Indicates that the model is performing similarly on both the training and validation datasets, which generally suggests good generalization to unseen data.

* Large Generalization Gap: Indicates that the model performs much better on the training data than on the validation data, suggesting overfitting. This means the model has learned patterns specific to the training data that don't generalize well to new data.
  List item




In [None]:
#Always monitor the validation loss and generalization gap (difference between training and validation loss) to guide your choice.
trainer.train()


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss
0,1.6977,1.624017
1,1.6068,1.57621
2,1.5396,1.562598
3,1.5016,1.557966
4,1.4655,1.55884


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu

#####save final fine-tuned model (adapter)checkpoint for later use.


In [None]:
from time import strftime, localtime
path = '/content/drive/MyDrive/finetune_sum/final_adapter'
t = int(time.time()) ##in epoch
a = strftime('%Y-%m-%d %H:%M:%S', localtime(t)) #in datetime format
peft_model_path=f"{path}/peft-dialogue-summary-mistral-checkpoint-local-{a}_train2"

trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

##Models merging and saving


In [None]:
#The fine-tuned model is now trained but not yet ready to use for inference.
##fine-tuned model is an adapter model and the raw model (mistral 7b) a base model.


In [None]:
MODEL_CHECKPOINT

'mistralai/Mistral-7B-Instruct-v0.2'

###merging of adapter and base model and saving it

In [None]:
model = AutoModelForCausalLM.from_pretrained(MODEL_CHECKPOINT)

model = PeftModel.from_pretrained(model, peft_model_path)

model = model.merge_and_unload() ##merging of adapter and base model v0.2

# t = int(time.time()) ##in epoch
# a = strftime('%Y-%m-%d %H:%M:%S', localtime(t)) #in datetime format
path = '/content/drive/MyDrive/finetune_sum/merged_model'
model_dir = f"{path}/models/merged-peft-dialogue-summary-mistral_train2/"
model.save_pretrained(model_dir, safe_serialization=True)
tokenizer.save_pretrained(model_dir)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

('/content/drive/MyDrive/finetune_sum/merged_model/models/merged-peft-dialogue-summary-mistral-2024-09-04 05:12:48_train2/tokenizer_config.json',
 '/content/drive/MyDrive/finetune_sum/merged_model/models/merged-peft-dialogue-summary-mistral-2024-09-04 05:12:48_train2/special_tokens_map.json',
 '/content/drive/MyDrive/finetune_sum/merged_model/models/merged-peft-dialogue-summary-mistral-2024-09-04 05:12:48_train2/tokenizer.model',
 '/content/drive/MyDrive/finetune_sum/merged_model/models/merged-peft-dialogue-summary-mistral-2024-09-04 05:12:48_train2/added_tokens.json',
 '/content/drive/MyDrive/finetune_sum/merged_model/models/merged-peft-dialogue-summary-mistral-2024-09-04 05:12:48_train2/tokenizer.json')

##Load the final fine tuned model and tokenizer from the drive


In [None]:
MODEL_CHECKPOINT = 'mistralai/Mistral-7B-Instruct-v0.2'

# from time import strftime, localtime
path = '/content/drive/MyDrive/finetune_sum/final_adapter'
# t = int(time.time()) ##in epoch
# a = strftime('%Y-%m-%d %H:%M:%S', localtime(t)) #in datetime format
peft_model_path="/content/drive/MyDrive/finetune_sum/final_adapter/peft-dialogue-summary-mistral-checkpoint-local-2024-09-04 01:00:42_train2"

#pick the base model
model = AutoModelForCausalLM.from_pretrained(MODEL_CHECKPOINT)

#associate the base model with final adapter
model = PeftModel.from_pretrained(model, peft_model_path)

#merge both base model and adapter
model = model.merge_and_unload() ##merging of adapter and base model v0.2

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
from transformers import AutoTokenizer

# Path to the directory where the tokenizer is saved
tokenizer_dir = "/content/drive/MyDrive/finetune_sum/merged_model/models/merged-peft-dialogue-summary-mistral-2024-09-04 05:12:48_train2"

# Load the tokenizer from the saved directory
tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir)

##Pushing model to hugging face hub

In [None]:
#And now finally the fine-tuned model is both trained and ready to use for inference.
!huggingface-cli login




    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: write)

In [None]:
model.push_to_hub("Mistral-7B-Instruct-v0.2-SAASLABS-SFTT-v3", private=True)


model-00001-of-00006.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

Upload 6 LFS files:   0%|          | 0/6 [00:00<?, ?it/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/4.25G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mlteam-justcall/Mistral-7B-Instruct-v0.2-SAASLABS-SFTT-v3/commit/bed0324ebc490239c8154faeccbf13a26a78b9cf', commit_message='Upload MistralForCausalLM', commit_description='', oid='bed0324ebc490239c8154faeccbf13a26a78b9cf', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
tokenizer.push_to_hub("Mistral-7B-Instruct-v0.2-SAASLABS-SFTT-v3",private=True)


README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mlteam-justcall/Mistral-7B-Instruct-v0.2-SAASLABS-SFTT-v3/commit/1ea42078553b754e699718acaf17f8217ed802b4', commit_message='Upload tokenizer', commit_description='', oid='1ea42078553b754e699718acaf17f8217ed802b4', pr_url=None, pr_revision=None, pr_num=None)

#Evaluation

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
##evaluate our fine-tuned dialogue summarization model against the original raw Mixtral 7B model
#ROUGE metric (metric dedicated for summarization task)
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: write)

##Data preprocessing and inference

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:


from datasets import load_dataset,DatasetDict,load_metric

# Loading custom dataset
path = "/content/drive/MyDrive/finetune_sum/data/train2.csv"
dataset = load_dataset('csv', data_files= path)
# Split the dataset into train and test+validation sets
split_dataset = dataset['train'].train_test_split(test_size=0.2)
# Further split the test+validation set into validation and test sets
test_valid_split = split_dataset['test'].train_test_split(test_size=0.5)

# Combine splits into a single DatasetDict
data = DatasetDict({
    'train': split_dataset['train'],
    'validation': test_valid_split['train'],
    'test': test_valid_split['test']
})

# Example output
print(data)

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'summary', 'dialogue'],
        num_rows: 2393
    })
    validation: Dataset({
        features: ['id', 'summary', 'dialogue'],
        num_rows: 299
    })
    test: Dataset({
        features: ['id', 'summary', 'dialogue'],
        num_rows: 300
    })
})


In [None]:
data = data['test']

In [None]:
# data = data[:10]

In [None]:
data

Dataset({
    features: ['id', 'summary', 'dialogue'],
    num_rows: 300
})

In [None]:
# Select the first 15 data points from the dataset
selected_data = data.select(range(15))

# Print the selected data points
print(selected_data)

Dataset({
    features: ['id', 'summary', 'dialogue'],
    num_rows: 15
})


In [None]:
print(selected_data[0]["dialogue"])



System: CCO Pet Nutrition. How can I help you today?
New JustCall: Hi there. I wonder if I could cancel, um, my subscription. I got one tubit, and I actually it didn't agree with my doggies. 
System: Okay. Yeah. Of course. Uh, let's have a look for you.
New JustCall: SoI just realized that still I would have a subscription to come. 
System: Yeah. Of course. Let's have a look.
New JustCall: I'm trying it, like, everysecond day or something, but they were quite sick. So 
System: Oh, bless them. Of course. Now okay. Let's have a look.
System: Um, are you okay just to confirm your, um, address and postcode for me first, please?
New JustCall: Yes. It's Moercroft, Old Fiskie Road,Ochterhaus Dundee. Delta delta 3 0 Romeo delta. 
System: That's great. Thank you. And can I just take your full name as well, please?
New JustCall: It's on Gula York.
System: That's great. Yeah. So you've got one, um,
System: one top on the subscription. Um, so you just wanted to cancel that one.
New JustCall: Yeah

In [None]:
print(selected_data[0]["summary"])


Call Summary:
1. The customer wants to cancel their subscription because the product didn't agree with their dogs.
2. The sales agent confirms the customer's address and name.
3. The customer asks about the dosage for their dogs and the sales agent provides the recommended dosage based on the dog's weight.
4. The sales agent suggests lowering the dosage and splitting it into two meals if necessary.
5. The customer agrees to try lowering the dosage and splitting it into two meals.
6. The sales agent cancels the customer's subscription and reminds them of the money-back guarantee.




In [None]:
len(selected_data)

15

In [None]:
def preprocess_data(example):
  dialogue = example["dialogue"]
  prompt = f"""<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:
{dialogue}[/INST]
</s>"""
  return {"dialogue": prompt}

In [None]:
data_preprocessed = selected_data.map(preprocess_data, batched=False, remove_columns=["id"])


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

In [None]:
data_preprocessed


Dataset({
    features: ['summary', 'dialogue'],
    num_rows: 15
})

In [None]:
print(data_preprocessed[3]["dialogue"])


<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:

Amy: It's Bobby Arch. This is Ayesha speaking.
Adam: Hi there. My name is Adam.Um, I'm just curious when your guys are coming back next. 
Amy: Okay.
Amy: Sorry. Was this for
Adam: So
Amy: um, Adam?
Adam: yes. That's the one. Yes. I've got the the water leak in my wall.
Amy: Okay.
Amy: Okay. So
Amy: one of our technicians went out on the 6th for you.
Adam: Yes. That's correct.
Amy: Okay.
Amy: Sorry. Sorry. Can you just explain to me what's, um,
Amy: sorry.
Adam: Sure. Sure. Um, so I had you guys come out, swap aaircon split system out for me, the process of doing that, you managed to there's some damage done inside the wall that's caused a water leak. That was confirmed by your guy that came out on 6th, and then he said he was gonna go back and have a discussion and then go from there. I was just I hadn't heard anything back yet, and I was curious when when we'll get this water leak fixed.

In [None]:
print(data_preprocessed[3]["summary"])


Call Summary:
1. Ayesha reported a water leak in her wall and asked when the repair will be done.
2. Amy tried to contact Jesse, but he had already left for the day.
3. Jesse will call Ayesha before 10:10 AM tomorrow to discuss the repair options.
4. Adam expressed relief that Jesse will be calling back soon.




##Load models from Hugging Face hub along with the respective tokenizers.

In [None]:
# Fine-tuned model ##RAM USED max (40GB plus)
tokenizer = AutoTokenizer.from_pretrained("mlteam-justcall/Mistral-7B-Instruct-v0.2-SAASLABS-SFTT-v3")
model = AutoModelForCausalLM.from_pretrained("mlteam-justcall/Mistral-7B-Instruct-v0.2-SAASLABS-SFTT-v3")
model.generation_config.pad_token_id = model.generation_config.eos_token_id

Access to the secret `HF_TOKEN` has not been granted on this notebook.
You will not be requested again.
Please restart the session if you want to be prompted again.


tokenizer_config.json:   0%|          | 0.00/2.21k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/652 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/4.25G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [None]:
# Base Mistral-7B-Instruct-v0.2 model
raw_tokenizer = AutoTokenizer.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2')
raw_model = AutoModelForCausalLM.from_pretrained('mistralai/Mistral-7B-Instruct-v0.2')
raw_model.generation_config.pad_token_id = raw_model.generation_config.eos_token_id
raw_tokenizer.pad_token = raw_tokenizer.unk_token

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

##Print of one example of full dialogue, groundtruth summary and summaries generated by both models.

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


### rough test

In [None]:
def summarize(tokenizer, model, dialogue):
    inputs = tokenizer(dialogue, return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.0001)
    return tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

In [None]:
dialogue = """<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:
Luke: Hello. Luke from Mandeep speaking.
Alessio: Hi, Rick. How's it going? Uh, just returning this call.
Luke: Oh, yeah. Is this Alessio?
Alessio: Uh, yes. Speaking.
Luke: Hey, Alejandro. Um, yeah, mate. Luke from OnDeck here. I was just, uh, calling to touch base
Luke: mate. Um, we haven't spoken in a while.
Luke: Wanted to see how everything was going on your end.
Alessio: Yeah. No. Thanks for that. Um, yeah. Going well. Um, Idon't remember. I think we spoke a while back, but, um, did you end up having a chat with your team to see if you had any functionality for, like, a link? What's the referral a referral link?
Luke: A link.
Luke: What's lates and things?
Alessio: Yeah.
Luke: Yeah. So we we don't actually use a a link
Luke: for, like, uh, referrals or anything. We I mainly do it through email or just
Luke: uh, via
Alessio: Okay.
Luke: via calling, um, because we kinda
Luke: we we think that speeds up the process a lot rather than, uh,
Luke: actually providing a link like the other unsecured lenders.
Luke: Um, were you doing much in that space at the moment, mate?
Alessio: Yeah. I'm doing a bit in that space, and I'm looking at, um,growing a bit more on the content side on on our website for finance related resources for small businesses. Um, so it'd be something that'd be handy we can put that link in there and, um, just make it easier for people to sign up. But, um, do you have any, like, forms or anything like that that I can integrate in?
Luke: We we we can do some white label marketing for you, mate.
Luke: Um, would that would that be better?
Alessio: What what does that sort of, um, entail?
Luke: So that's like, um, it'll be like a marketing
Luke: piece, um, just promoting small businesses.
Luke: And all you need to do is just put your logo on it, and then you can share that kind of how you like.
Alessio: Okay.
Luke: Um, I can send you I can send you a draft.
Alessio: Yeah. Okay. Is it just like a landing page type thing or
Luke: Yeah. Yeah. Yeah.
Alessio: Yep. Okay. Yeah. If you can send through that, that'd be that'd be great. Yeah.
Luke: For sure, mate. For sure. Well, I'll send that through to you now. Let me know what you think.
Alessio: Yeah. No problem.
Luke: Alright. Thanks, mate.
Alessio: Alright. Thanks. Goodbye.[/INST]
</s>
"""
prompt = f"""<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:
{dialogue}[/INST]
</s>"""

In [None]:
dialogue

"<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:\nLuke: Hello. Luke from Mandeep speaking.\nAlessio: Hi, Rick. How's it going? Uh, just returning this call.\nLuke: Oh, yeah. Is this Alessio?\nAlessio: Uh, yes. Speaking.\nLuke: Hey, Alejandro. Um, yeah, mate. Luke from OnDeck here. I was just, uh, calling to touch base\nLuke: mate. Um, we haven't spoken in a while.\nLuke: Wanted to see how everything was going on your end.\nAlessio: Yeah. No. Thanks for that. Um, yeah. Going well. Um, Idon't remember. I think we spoke a while back, but, um, did you end up having a chat with your team to see if you had any functionality for, like, a link? What's the referral a referral link? \nLuke: A link.\nLuke: What's lates and things?\nAlessio: Yeah.\nLuke: Yeah. So we we don't actually use a a link\nLuke: for, like, uh, referrals or anything. We I mainly do it through email or just\nLuke: uh, via\nAlessio: Okay.\nLuke: via calling, um, because we kin

In [None]:
(f"dialog:{dialogue}\n\nfinetuned_summ: {summarize(tokenizer, model.to(DEVICE), dialogue)}")

"dialog:<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:\nLuke: Hello. Luke from Mandeep speaking.\nAlessio: Hi, Rick. How's it going? Uh, just returning this call.\nLuke: Oh, yeah. Is this Alessio?\nAlessio: Uh, yes. Speaking.\nLuke: Hey, Alejandro. Um, yeah, mate. Luke from OnDeck here. I was just, uh, calling to touch base\nLuke: mate. Um, we haven't spoken in a while.\nLuke: Wanted to see how everything was going on your end.\nAlessio: Yeah. No. Thanks for that. Um, yeah. Going well. Um, Idon't remember. I think we spoke a while back, but, um, did you end up having a chat with your team to see if you had any functionality for, like, a link? What's the referral a referral link? \nLuke: A link.\nLuke: What's lates and things?\nAlessio: Yeah.\nLuke: Yeah. So we we don't actually use a a link\nLuke: for, like, uh, referrals or anything. We I mainly do it through email or just\nLuke: uh, via\nAlessio: Okay.\nLuke: via calling, um, because

In [None]:
(f"dialog:{dialogue}\n\nBasemodel_summ: {summarize(raw_tokenizer, raw_model.to(DEVICE), dialogue)}")

"dialog:<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:\nLuke: Hello. Luke from Mandeep speaking.\nAlessio: Hi, Rick. How's it going? Uh, just returning this call.\nLuke: Oh, yeah. Is this Alessio?\nAlessio: Uh, yes. Speaking.\nLuke: Hey, Alejandro. Um, yeah, mate. Luke from OnDeck here. I was just, uh, calling to touch base\nLuke: mate. Um, we haven't spoken in a while.\nLuke: Wanted to see how everything was going on your end.\nAlessio: Yeah. No. Thanks for that. Um, yeah. Going well. Um, Idon't remember. I think we spoke a while back, but, um, did you end up having a chat with your team to see if you had any functionality for, like, a link? What's the referral a referral link? \nLuke: A link.\nLuke: What's lates and things?\nAlessio: Yeah.\nLuke: Yeah. So we we don't actually use a a link\nLuke: for, like, uh, referrals or anything. We I mainly do it through email or just\nLuke: uh, via\nAlessio: Okay.\nLuke: via calling, um, because

In [None]:
idx = 2
(f"dialo: {data_preprocessed['dialogue'][idx]}\n\n summ: {summarize(tokenizer, model.to(DEVICE), data_preprocessed['dialogue'][idx])}")

"dialo: <s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:\n\nLuke: Hello. Luke from Mandeep speaking.\nAlessio: Hi, Rick. How's it going? Uh, just returning this call.\nLuke: Oh, yeah. Is this Alessio?\nAlessio: Uh, yes. Speaking.\nLuke: Hey, Alejandro. Um, yeah, mate. Luke from OnDeck here. I was just, uh, calling to touch base\nLuke: mate. Um, we haven't spoken in a while.\nLuke: Wanted to see how everything was going on your end.\nAlessio: Yeah. No. Thanks for that. Um, yeah. Going well. Um, Idon't remember. I think we spoke a while back, but, um, did you end up having a chat with your team to see if you had any functionality for, like, a link? What's the referral a referral link? \nLuke: A link.\nLuke: What's lates and things?\nAlessio: Yeah.\nLuke: Yeah. So we we don't actually use a a link\nLuke: for, like, uh, referrals or anything. We I mainly do it through email or just\nLuke: uh, via\nAlessio: Okay.\nLuke: via calling, um, becau

### Real test

In [None]:
def summarize(tokenizer, model, dialogue):
    inputs = tokenizer(dialogue, return_tensors="pt").to(DEVICE)
    inputs_length = len(inputs["input_ids"][0])
    with torch.inference_mode():
        outputs = model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.0001)
    return tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

In [None]:
# import os
# import torch

# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
# batch_size = 10  # Adjust based on your memory constraints
# finetuned_generated_summaries = []

# for i in range(0, len(data_preprocessed["dialogue"]), batch_size):
#     batch = data_preprocessed["dialogue"][i:i+batch_size]
#     print(f"{i}th batch", batch)

#     for row in batch:
#         print(row)
#         finetuned_generated_summary = summarize(tokenizer, model.to(DEVICE), row).strip()
#         finetuned_generated_summaries.append(finetuned_generated_summary)

#     torch.cuda.empty_cache()
#After processing each batch, the GPU memory is cleared using torch.cuda.empty_cache() to prevent out-of-memory errors.


import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
finetuned_generated_summaries = []
for idx, row in enumerate(data_preprocessed["dialogue"]):
  finetuned_generated_summary = summarize(tokenizer, model.to(DEVICE), row).strip() ##use of GPU (max 34.7GB )
  finetuned_generated_summaries.append(finetuned_generated_summary)

In [None]:
data_preprocessed["dialogue"][2]

"<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:\n\nLuke: Hello. Luke from Mandeep speaking.\nAlessio: Hi, Rick. How's it going? Uh, just returning this call.\nLuke: Oh, yeah. Is this Alessio?\nAlessio: Uh, yes. Speaking.\nLuke: Hey, Alejandro. Um, yeah, mate. Luke from OnDeck here. I was just, uh, calling to touch base\nLuke: mate. Um, we haven't spoken in a while.\nLuke: Wanted to see how everything was going on your end.\nAlessio: Yeah. No. Thanks for that. Um, yeah. Going well. Um, Idon't remember. I think we spoke a while back, but, um, did you end up having a chat with your team to see if you had any functionality for, like, a link? What's the referral a referral link? \nLuke: A link.\nLuke: What's lates and things?\nAlessio: Yeah.\nLuke: Yeah. So we we don't actually use a a link\nLuke: for, like, uh, referrals or anything. We I mainly do it through email or just\nLuke: uh, via\nAlessio: Okay.\nLuke: via calling, um, because we k

In [None]:
# for idx, row in enumerate(data_preprocessed["dialogue"]):
idx = 2
row = data_preprocessed["dialogue"][idx]
finetuned_generated_summary = summarize(tokenizer, model.to(DEVICE), row).strip()

In [None]:
(f"dialo: {data_preprocessed['dialogue'][idx]}\n\n summ: {summarize(tokenizer, model.to(DEVICE), row)}")

"dialo: <s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:\n\nLuke: Hello. Luke from Mandeep speaking.\nAlessio: Hi, Rick. How's it going? Uh, just returning this call.\nLuke: Oh, yeah. Is this Alessio?\nAlessio: Uh, yes. Speaking.\nLuke: Hey, Alejandro. Um, yeah, mate. Luke from OnDeck here. I was just, uh, calling to touch base\nLuke: mate. Um, we haven't spoken in a while.\nLuke: Wanted to see how everything was going on your end.\nAlessio: Yeah. No. Thanks for that. Um, yeah. Going well. Um, Idon't remember. I think we spoke a while back, but, um, did you end up having a chat with your team to see if you had any functionality for, like, a link? What's the referral a referral link? \nLuke: A link.\nLuke: What's lates and things?\nAlessio: Yeah.\nLuke: Yeah. So we we don't actually use a a link\nLuke: for, like, uh, referrals or anything. We I mainly do it through email or just\nLuke: uh, via\nAlessio: Okay.\nLuke: via calling, um, becau

In [None]:
finetuned_generated_summary

'INSTINST:\n\n\n'

In [None]:
for idx, row in enumerate(data_preprocessed["dialogue"]):
  print(f"row{idx} and the row is:{row}")
  print("****************************************************************")


row0 and the row is:<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:

System: CCO Pet Nutrition. How can I help you today?
New JustCall: Hi there. I wonder if I could cancel, um, my subscription. I got one tubit, and I actually it didn't agree with my doggies. 
System: Okay. Yeah. Of course. Uh, let's have a look for you.
New JustCall: SoI just realized that still I would have a subscription to come. 
System: Yeah. Of course. Let's have a look.
New JustCall: I'm trying it, like, everysecond day or something, but they were quite sick. So 
System: Oh, bless them. Of course. Now okay. Let's have a look.
System: Um, are you okay just to confirm your, um, address and postcode for me first, please?
New JustCall: Yes. It's Moercroft, Old Fiskie Road,Ochterhaus Dundee. Delta delta 3 0 Romeo delta. 
System: That's great. Thank you. And can I just take your full name as well, please?
New JustCall: It's on Gula York.
System: That's great. Yeah. So 

In [None]:
# def summarize(tokenizer, model, dialogue):
#     inputs = tokenizer(dialogue, return_tensors="pt").to(DEVICE)
#     inputs_length = len(inputs["input_ids"][0])
#     with torch.inference_mode():
#         outputs = model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.0001)
#     # Manually remove tensors from GPU to clear memory
#     del inputs
#     torch.cuda.empty_cache()  # Clear after each iteration
#     return tokenizer.decode(outputs[0][inputs_length:], skip_special_tokens=True)

# import os
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
# finetuned_generated_summaries = []
# for idx, row in enumerate(data_preprocessed["dialogue"]):
#     finetuned_generated_summary = summarize(tokenizer, model.to(DEVICE), row).strip() ##use of GPU (max 34.7GB )
#     finetuned_generated_summaries.append(finetuned_generated_summary)
#     torch.cuda.empty_cache()  # Ensure the GPU cache is cleared after each iteration


In [None]:
# For loop here clears memory after each iteration and does not cause out of memory error - map does
# import os
# import torch

# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
# batch_size = 10  # Adjust based on your memory constraints
# finetuned_generated_summaries = []
# for idx, row in enumerate(data_preprocessed["dialogue"]):
#   finetuned_generated_summary = summarize(tokenizer, model.to(DEVICE), row).strip() ##use of GPU (max 34.7GB )
#   finetuned_generated_summaries.append(finetuned_generated_summary)
#   # Clear GPU cache
#   torch.cuda.empty_cache()

#   # Optionally delete variables to free up memory
#   del finetuned_generated_summary

In [None]:
data_preprocessed = data_preprocessed.add_column("finetuned_generated_summary", finetuned_generated_summaries)


In [None]:
# For loop here clears memory after each iteration and does not cause out of memory error - map does

# import os
# import torch

# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
# batch_size = 50  # Adjust based on your memory constraints
# raw_generated_summaries = []

# for i in range(0, len(data_preprocessed["dialogue"]), batch_size):
#     batch = data_preprocessed["dialogue"][i:i+batch_size]

#     for row in batch:
#         raw_generated_summary = summarize(raw_tokenizer, raw_model.to(DEVICE), row).strip()
#         raw_generated_summaries.append(raw_generated_summary)

#     torch.cuda.empty_cache()

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
raw_generated_summaries = []
for idx, row in enumerate(data_preprocessed["dialogue"]):
  raw_generated_summary = summarize(raw_tokenizer, raw_model.to(DEVICE), row).strip() ##use of GPU (max 34.7GB )
  raw_generated_summaries.append(raw_generated_summary)

In [None]:
data_preprocessed = data_preprocessed.add_column("raw_generated_summary", raw_generated_summaries)


In [None]:
print(data_preprocessed[4]["dialogue"])


<s>[INST] You are a helpful assistant. Your task is to generate following dialogue summarization:

VIJAYA: Hello?
Mohammed Khaled: Hello? Uh, hi. Uh, good morning. Jay, Mohammed here. Mohammed,is signing. Eric does. It's a month for signing that you probably use. 
VIJAYA: Hello. Hello. Moment by. Hello?
Mohammed Khaled: Alright. Hi. I am so sorry if I missed.Meet a basically hard connection, so I was going to reconsider the stuff. So I was working Yeah. I just I just I just I 
VIJAYA: Yeah. Yeah.
Mohammed Khaled: yes. It it is. It is a beautiful How are you, bro? Tell me. America.
VIJAYA: Good.
Mohammed Khaled: Sorry. I couldn't reply to your email yesterday. I was in the office. Uh, but I got some emails.Okay. 
VIJAYA: Smoke detectors, uh, smoke alarm detectors.
Mohammed Khaled: Okay. So, um, I I just want you to go ahead with this product.
VIJAYA: Would does that unscrew replace
Mohammed Khaled: The 300. Okay.
VIJAYA: Okay. No. No worries. That's, uh,
Mohammed Khaled: Uh, so it's gon

In [None]:
#ground truth
print(data_preprocessed[4]["summary"])


Call Summary:
1. Mohammed apologizes for missing Vijaya's email and explains that he was in the office.
2. Vijaya mentions smoke detectors as a product of interest.
3. Mohammed suggests going ahead with the smoke detectors.
4. Vijaya asks about the process of unscrewing and replacing the detectors.
5. Mohammed confirms the price of the detectors as $300.
6. The call ends with both parties expressing gratitude.




In [None]:
print(data_preprocessed[4]["finetuned_generated_summary"])


INSTINST:
1. Mohammed apologizes for missing a hard connection and mentions working on something.
2. Mohammed mentions receiving emails and apologizes for not replying to Vijay's email.
3. Mohammed suggests going ahead with a product related to smoke detectors or alarms.
4. Mohammed confirms the replacement of the alarm to be short.


In [None]:
print(data_preprocessed[3]["raw_generated_summary"])


mma: Hi Carrie, it's Emma from 4 Paws. I'm checking in before your photo session tomorrow. Everything okay for an early morning appointment?

Carrie: Yes, that's fine. And you?

Emma: Good, thanks. I wanted to confirm some details. Your dogs are Bolen, a partner, Pepper, and Cheddar, correct?

Carrie: Yes, that's right.

Emma: Great. You're all excited for the session, I assume?

Carrie: Yes, we are. We hope it won't rain.

Emma: Don't worry, we're excellent at working around rain, and it's actually the best weather for photos. For Pepper and Cheddar, please bring them on a collar and lead instead of a harness for better photo opportunities.

Carrie: Understood.

Emma: You have a Corgi and a German Shepherd, correct?

Carrie: Yes.

Emma: That's an interesting pair! You mentioned wanting some beautiful natural shots in a bushland setting. Still interested in that?


In [None]:
import pandas as pd
# from time import strftime, localtime

# # Convert the dataset to a pandas DataFrame
# t = int(time.time()) ##in epoch
# a = strftime('%Y-%m-%d %H:%M:%S', localtime(t)) #in datetime format
path = "/content/drive/MyDrive/finetune_sum/comparison"
df = pd.DataFrame({
    'dialogue': data_preprocessed['dialogue'],
    'groundtruth_summary': data_preprocessed['summary'],
    # 'raw_generated_summary': data_preprocessed['raw_generated_summary'],
    'finetuned_generated_summary': data_preprocessed['finetuned_generated_summary']
    # 'raw_v0.2_generated_summary': data_preprocessed['raw_generated_summary']
})

# Save the DataFrame to a CSV file
# df.to_csv(f'{path}/baseModel _summaries_comaprison_output_train2.csv', index=False)
df.to_csv(f'{path}/finetune_comp_with_groundTruth.csv', index=True)

##ROUGE metric evaluation


*   ROUGE-L: Use it when you want to evaluate the quality of single-sentence summaries or when you care about the sequence of words in individual sentences.
*   ROUGE-LSum: Use it when you are dealing with multi-sentence summaries and want to evaluate the overall coherence and structure of the entire summary.
* Low Scores (0.1 - 0.2): Indicate poor performance, with minimal overlap and relevance.
Moderate Scores (0.3 - 0.4): Common for well-performing models on challenging tasks.
High Scores (0.5 - 0.6 and above): Indicate strong performance, though achieving such scores depends on the dataset and task complexity.


*In our case focus on ROUGE-LSum*

In [None]:
"""
0.0: No overlap between the generated summary and the reference summary.
 This indicates that the generated summary is completely dissimilar to the reference.

1.0: Perfect overlap between the generated summary and the reference summary. This would mean the generated summary is identical to the reference.


"""
rouge = evaluate.load('rouge')


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [None]:
# Fine-tuned model ROUGE
model_rouge = rouge.compute(
    predictions=data_preprocessed["finetuned_generated_summary"],
    references=data_preprocessed["summary"][0:len(data_preprocessed["finetuned_generated_summary"])],
    use_aggregator=True,
    use_stemmer=True,
)
model_rouge

{'rouge1': 0.6904313178719867,
 'rouge2': 0.5203692657790906,
 'rougeL': 0.5980214859662567,
 'rougeLsum': 0.6771554558791575}

In [None]:
# Base model ROUGE
model_rouge = rouge.compute(
    predictions=data_preprocessed["raw_generated_summary"],
    references=data_preprocessed["summary"][0:len(data_preprocessed["raw_generated_summary"])],
    use_aggregator=True,
    use_stemmer=True,
)
model_rouge