# Import and Logins

In [1]:
import os
from dataclasses import dataclass, field
from typing import Optional

In [2]:
import torch
from peft import LoraConfig, prepare_model_for_kbit_training
from transformers import (
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
AutoModelForCausalLM
)

In [3]:
from tqdm.notebook import tqdm

In [4]:
from trl import SFTTrainer

In [5]:
from huggingface_hub import interpreter_login

In [None]:
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

In [6]:
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .


Enter your token (input will not be visible):  ········
Add token as git credential? (Y/n)  Y


Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m
Token has not been saved to git credential helper.
Your token has been saved to /home/mouli/.cache/huggingface/token
Login successful


# Load Dataset

In [65]:
import pandas as pd

In [66]:
df = pd.read_csv('cleaned_data_for_finetuning.csv')
df.head()

Unnamed: 0,Question,Answer
0,What is a gastric ulcer?,"A gastric ulcer, also known as a stomach ulcer..."
1,What causes gastric ulcers?,"Gastric ulcers, also known as stomach ulcers, ..."
2,How does H. pylori infection contribute to gas...,Helicobacter pylori (H. pylori) infection cont...
3,What are the common symptoms of a gastric ulcer?,Common symptoms of a gastric ulcer include: 1...
4,How are gastric ulcers and duodenal ulcers dif...,Gastric ulcers and duodenal ulcers are both ty...


In [67]:
df.shape

(70, 2)

In [68]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70 entries, 0 to 69
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Question  70 non-null     object
 1   Answer    70 non-null     object
dtypes: object(2)
memory usage: 1.2+ KB


In [69]:
instruction_text = "Understand the following question and provide answer in a detailed manner"

df['Instruction'] = instruction_text

In [70]:
df.head()

Unnamed: 0,Question,Answer,Instruction
0,What is a gastric ulcer?,"A gastric ulcer, also known as a stomach ulcer...",Understand the following question and provide ...
1,What causes gastric ulcers?,"Gastric ulcers, also known as stomach ulcers, ...",Understand the following question and provide ...
2,How does H. pylori infection contribute to gas...,Helicobacter pylori (H. pylori) infection cont...,Understand the following question and provide ...
3,What are the common symptoms of a gastric ulcer?,Common symptoms of a gastric ulcer include: 1...,Understand the following question and provide ...
4,How are gastric ulcers and duodenal ulcers dif...,Gastric ulcers and duodenal ulcers are both ty...,Understand the following question and provide ...


In [14]:
df.shape

(70, 3)

In [16]:
tokenizer_model = 'microsoft/Phi-3-mini-4k-instruct'
tokenizer = AutoTokenizer.from_pretrained(tokenizer_model, use_fast=True)

In [17]:
data_prompt = """Below is an instruction that describes a task, without further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""

EOS_TOKEN = temp_tokenizer.eos_token  # Make sure you have a tokenizer object initialized

def formatting_prompts_func(examples):
    instructions = examples['Instruction']  # Use the instruction column
    inputs = examples['Question']  # Use the Question column as input
    outputs = examples['Answer']  # Use the Answer column as output

    # Format each row
    texts = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):
        # Format the text according to the Alpaca template
        text = data_prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    
    return { "text": texts }

from datasets import Dataset
dataset = Dataset.from_pandas(df)
formatted_dataset = dataset.map(formatting_prompts_func, batched=True)

Map:   0%|          | 0/70 [00:00<?, ? examples/s]

In [18]:
formatted_dataset

Dataset({
    features: ['Question', 'Answer', 'Instruction', 'text'],
    num_rows: 70
})

In [19]:
print(formatted_dataset['text'][0])

Below is an instruction that describes a task, without further context. Write a response that appropriately completes the request.
### Instruction:
Understand the following question and provide answer in a detailed manner
### Input:
What is a gastric ulcer?
### Response:
A gastric ulcer, also known as a stomach ulcer, is a type of peptic ulcer that specifically occurs in the stomach lining. It is a sore or lesion that develops when the stomach's protective mucus lining is diminished, which allows the acidic digestive juices to erode the tissues that line the stomach. This can lead to pain, bleeding, and other gastrointestinal symptoms.  The most common cause of gastric ulcers is the prolonged use of nonsteroidal anti-inflammatory drugs (NSAIDs) such as ibuprofen and aspirin, and infection with the bacterium Helicobacter pylori. Other factors that can contribute to the development of gastric ulcers include excessive alcohol consumption, smoking, severe stress, and the use of certain oth

In [21]:
df['Formatted'] = pd.DataFrame(formatted_dataset['text'])

In [23]:
df['Formatted'][0]

"Below is an instruction that describes a task, without further context. Write a response that appropriately completes the request.\n### Instruction:\nUnderstand the following question and provide answer in a detailed manner\n### Input:\nWhat is a gastric ulcer?\n### Response:\nA gastric ulcer, also known as a stomach ulcer, is a type of peptic ulcer that specifically occurs in the stomach lining. It is a sore or lesion that develops when the stomach's protective mucus lining is diminished, which allows the acidic digestive juices to erode the tissues that line the stomach. This can lead to pain, bleeding, and other gastrointestinal symptoms.  The most common cause of gastric ulcers is the prolonged use of nonsteroidal anti-inflammatory drugs (NSAIDs) such as ibuprofen and aspirin, and infection with the bacterium Helicobacter pylori. Other factors that can contribute to the development of gastric ulcers include excessive alcohol consumption, smoking, severe stress, and the use of cert

In [24]:
new_df = df.rename(columns={'Formatted': 'Text'})

In [25]:
new_df

Unnamed: 0,Question,Answer,Instruction,Text
0,What is a gastric ulcer?,"A gastric ulcer, also known as a stomach ulcer...",Understand the following question and provide ...,"Below is an instruction that describes a task,..."
1,What causes gastric ulcers?,"Gastric ulcers, also known as stomach ulcers, ...",Understand the following question and provide ...,"Below is an instruction that describes a task,..."
2,How does H. pylori infection contribute to gas...,Helicobacter pylori (H. pylori) infection cont...,Understand the following question and provide ...,"Below is an instruction that describes a task,..."
3,What are the common symptoms of a gastric ulcer?,Common symptoms of a gastric ulcer include: 1...,Understand the following question and provide ...,"Below is an instruction that describes a task,..."
4,How are gastric ulcers and duodenal ulcers dif...,Gastric ulcers and duodenal ulcers are both ty...,Understand the following question and provide ...,"Below is an instruction that describes a task,..."
...,...,...,...,...
65,Can black stool indicate a gastric ulcer?,"Yes, black stool can be an indication of a gas...",Understand the following question and provide ...,"Below is an instruction that describes a task,..."
66,What is the purpose of performing a tissue bio...,A tissue biopsy in the case of gastric ulcers ...,Understand the following question and provide ...,"Below is an instruction that describes a task,..."
67,What is the Sakita classification in gastric u...,The Sakita classification is a system used to ...,Understand the following question and provide ...,"Below is an instruction that describes a task,..."
68,Could vomiting blood indicate a gastric ulcer?,"Yes, vomiting blood can indeed indicate a gast...",Understand the following question and provide ...,"Below is an instruction that describes a task,..."


In [26]:
new_df = new_df[['Text']]

In [27]:
new_df

Unnamed: 0,Text
0,"Below is an instruction that describes a task,..."
1,"Below is an instruction that describes a task,..."
2,"Below is an instruction that describes a task,..."
3,"Below is an instruction that describes a task,..."
4,"Below is an instruction that describes a task,..."
...,...
65,"Below is an instruction that describes a task,..."
66,"Below is an instruction that describes a task,..."
67,"Below is an instruction that describes a task,..."
68,"Below is an instruction that describes a task,..."


In [28]:
new_df.to_csv('formatted_data_phi3_finetuning.csv', index=False)

In [31]:
temp_df = pd.read_csv('formatted_data_phi3_finetuning.csv')

In [34]:
temp_df['Text'][0]

"Below is an instruction that describes a task, without further context. Write a response that appropriately completes the request.\n### Instruction:\nUnderstand the following question and provide answer in a detailed manner\n### Input:\nWhat is a gastric ulcer?\n### Response:\nA gastric ulcer, also known as a stomach ulcer, is a type of peptic ulcer that specifically occurs in the stomach lining. It is a sore or lesion that develops when the stomach's protective mucus lining is diminished, which allows the acidic digestive juices to erode the tissues that line the stomach. This can lead to pain, bleeding, and other gastrointestinal symptoms.  The most common cause of gastric ulcers is the prolonged use of nonsteroidal anti-inflammatory drugs (NSAIDs) such as ibuprofen and aspirin, and infection with the bacterium Helicobacter pylori. Other factors that can contribute to the development of gastric ulcers include excessive alcohol consumption, smoking, severe stress, and the use of cert

# Create Training dataset

In [7]:
from datasets import load_dataset

In [8]:
training_dataset = load_dataset('csv', data_files='formatted_data_phi3_finetuning.csv', split='train')

In [9]:
training_dataset

Dataset({
    features: ['Text'],
    num_rows: 70
})

In [10]:
training_dataset[0]

{'Text': "Below is an instruction that describes a task, without further context. Write a response that appropriately completes the request.\n### Instruction:\nUnderstand the following question and provide answer in a detailed manner\n### Input:\nWhat is a gastric ulcer?\n### Response:\nA gastric ulcer, also known as a stomach ulcer, is a type of peptic ulcer that specifically occurs in the stomach lining. It is a sore or lesion that develops when the stomach's protective mucus lining is diminished, which allows the acidic digestive juices to erode the tissues that line the stomach. This can lead to pain, bleeding, and other gastrointestinal symptoms.  The most common cause of gastric ulcers is the prolonged use of nonsteroidal anti-inflammatory drugs (NSAIDs) such as ibuprofen and aspirin, and infection with the bacterium Helicobacter pylori. Other factors that can contribute to the development of gastric ulcers include excessive alcohol consumption, smoking, severe stress, and the us

# Fine-Tuning

In [11]:
if torch.cuda.is_bf16_supported():
  compute_dtype = torch.bfloat16
  attn_implementation = 'flash_attention_2'
# If bfloat16 is not supported, 'compute_dtype' is set to 'torch.float16' and 'attn_implementation' is set to 'sdpa'.
else:
  compute_dtype = torch.float16
  attn_implementation = 'sdpa'

# This line of code is used to print the value of 'attn_implementation', which indicates the chosen attention implementation.
print(attn_implementation)
print(compute_dtype)

flash_attention_2
torch.bfloat16


In [12]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA RTX A5000. Max memory = 23.577 GB.
0.0 GB of memory reserved.


In [13]:
# from accelerate import PartialState
# device_map = 'DDP'

# device_string = PartialState().process_index
# device_map = {'': device_string}

In [14]:
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [15]:
device_map = 'auto'

In [17]:
# torch.cuda.empty_cache()

In [40]:
base_model = 'microsoft/Phi-3-mini-4k-instruct'
new_model = 'Phi-3-fine-tuned'

tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True, trust_remote_code=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'right'

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type='nf4',
#     bnb_4bit_compute_dtype= 'bfloat16',
#     bnb_4bit_use_double_quant= True
# )

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    # quantization_config = bnb_config,
    trust_remote_code = True,
    attn_implementation = attn_implementation,
    device_map = device_map,
)

# model = prepare_model_for_kbit_training(model)

loading file tokenizer.model from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/tokenizer.model
loading file tokenizer.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/tokenizer.json
loading file added_tokens.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/added_tokens.json
loading file special_tokens_map.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/tokenizer_config.json
Special tokens have been added in t

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-4k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}



In [41]:
from transformers import set_seed
set_seed(1234)

In [42]:
from peft import TaskType, PeftModel

args = TrainingArguments(
        output_dir="./Phi-3-fine-tuned",
        evaluation_strategy="steps",
        do_eval=True,
        optim="adamw_torch",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=2,
        per_device_eval_batch_size=4,
        log_level="debug",
        save_strategy="epoch",
        logging_steps=100,
        learning_rate=1e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        eval_steps=100,
        num_train_epochs=3,
        warmup_ratio=0.1,
        lr_scheduler_type="linear",
        gradient_checkpointing=True,
        gradient_checkpointing_kwargs= {'use_reentrant': False},
        seed=42,
)

peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.05,
        task_type=TaskType.CAUSAL_LM,
        target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"],
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [43]:
trainer = SFTTrainer(
        model=model,
        train_dataset = training_dataset,
        peft_config=peft_config,
        dataset_text_field="Text",
        max_seq_length=512,
        tokenizer=tokenizer,
        args=args,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
PyTorch: setting up devices
You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
Using auto half precision backend


In [44]:
trainer.train()

# save model in local
trainer.save_model()

Currently training with a batch size of: 4
***** Running training *****
  Num examples = 70
  Num Epochs = 3
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 2
  Total optimization steps = 27
  Number of trainable parameters = 8,912,896
  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss


Saving model checkpoint to ./Phi-3-fine-tuned/checkpoint-9
loading configuration file config.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/config.json
Model config Phi3Config {
  "_name_or_path": "Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,
  "pad_token_i

0

In [45]:
hf_adapter_repo="mouli2001/phi3-fine-tuned-gastro"

In [46]:
trainer.push_to_hub(hf_adapter_repo)

Saving model checkpoint to ./Phi-3-fine-tuned
loading configuration file config.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/config.json
Model config Phi3Config {
  "_name_or_path": "Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "original_max_position_embeddings": 4096,
  "pad_token_id": 32000,
  

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.43k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/35.7M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mouli2001/Phi-3-fine-tuned/commit/de7a61553dd7e7a75ff779ff1605c879229a12ba', commit_message='mouli2001/phi3-fine-tuned-gastro', commit_description='', oid='de7a61553dd7e7a75ff779ff1605c879229a12ba', pr_url=None, repo_url=RepoUrl('https://huggingface.co/mouli2001/Phi-3-fine-tuned', endpoint='https://huggingface.co', repo_type='model', repo_id='mouli2001/Phi-3-fine-tuned'), pr_revision=None, pr_num=None)

In [47]:
# This code block is used to free up GPU memory.

# 'del model' and 'del trainer' are used to delete the 'model' and 'trainer' objects. 
# This removes the references to these objects, allowing Python's garbage collector to free up the memory they were using.

del model
del trainer

# 'import gc' is used to import Python's garbage collector module.
import gc

# 'gc.collect()' is a method that triggers a full garbage collection, which can help to free up memory.
# It's called twice here to ensure that all unreachable objects are collected.
gc.collect()
gc.collect()
     

0

In [48]:
torch.cuda.empty_cache()

In [49]:
gc.collect()

0

In [51]:
hf_adapter_repo="mouli2001/phi3-fine-tuned-gastro"

In [None]:
# peft_model_id = hf_adapter_repo
# tr_model_id = model_name

# model = AutoModelForCausalLM.from_pretrained(tr_model_id, trust_remote_code=True, torch_dtype=compute_dtype)
# model = PeftModel.from_pretrained(model, peft_model_id)
# model = model.merge_and_unload()

In [52]:
from peft import AutoPeftModelForCausalLM

In [53]:
new_model = AutoPeftModelForCausalLM.from_pretrained(
    args.output_dir,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16, #torch.float16,
    trust_remote_code=True,
    device_map=device_map,
)

merged_model = new_model.merge_and_unload()
merged_model.save_pretrained("merged_model", trust_remote_code=True, safe_serialization=True)
tokenizer.save_pretrained("merged_model")

loading configuration file config.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/config.json
loading configuration file config.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/config.json
Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-4k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/mouli/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/5a516f86087853f9d560c95eb9209c1d4ed9ff69/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You are resizing th

('merged_model/tokenizer_config.json',
 'merged_model/special_tokens_map.json',
 'merged_model/tokenizer.json')

In [55]:
merged_model.push_to_hub(hf_adapter_repo)

Configuration saved in /tmp/tmpaibn3d8m/config.json
Configuration saved in /tmp/tmpaibn3d8m/generation_config.json
The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at /tmp/tmpaibn3d8m/model.safetensors.index.json.
Uploading the following files to mouli2001/phi3-fine-tuned-gastro: generation_config.json,README.md,config.json,model-00001-of-00002.safetensors,model.safetensors.index.json,model-00002-of-00002.safetensors


model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mouli2001/phi3-fine-tuned-gastro/commit/983d3d086ef930e71798033bdfce05475b6429d1', commit_message='Upload Phi3ForCausalLM', commit_description='', oid='983d3d086ef930e71798033bdfce05475b6429d1', pr_url=None, repo_url=RepoUrl('https://huggingface.co/mouli2001/phi3-fine-tuned-gastro', endpoint='https://huggingface.co', repo_type='model', repo_id='mouli2001/phi3-fine-tuned-gastro'), pr_revision=None, pr_num=None)

In [56]:
tokenizer.push_to_hub(hf_adapter_repo)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer config file saved in /tmp/tmpq3qrb4dz/tokenizer_config.json
Special tokens file saved in /tmp/tmpq3qrb4dz/special_tokens_map.json
Uploading the following files to mouli2001/phi3-fine-tuned-gastro: README.md,tokenizer_config.json,special_tokens_map.json,tokenizer.json


CommitInfo(commit_url='https://huggingface.co/mouli2001/phi3-fine-tuned-gastro/commit/42305a19c5088d583b73681eefa83417fc60db84', commit_message='Upload tokenizer', commit_description='', oid='42305a19c5088d583b73681eefa83417fc60db84', pr_url=None, repo_url=RepoUrl('https://huggingface.co/mouli2001/phi3-fine-tuned-gastro', endpoint='https://huggingface.co', repo_type='model', repo_id='mouli2001/phi3-fine-tuned-gastro'), pr_revision=None, pr_num=None)

# Test

In [None]:
import os
os.environ["HF_TOKEN"] = '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'

In [57]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

set_seed(1234) 

In [58]:
tokenizer = AutoTokenizer.from_pretrained(hf_adapter_repo,trust_remote_code=True)

tokenizer_config.json:   0%|          | 0.00/3.33k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/447 [00:00<?, ?B/s]

loading file tokenizer.model from cache at None
loading file tokenizer.json from cache at /home/mouli/.cache/huggingface/hub/models--mouli2001--phi3-fine-tuned-gastro/snapshots/42305a19c5088d583b73681eefa83417fc60db84/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /home/mouli/.cache/huggingface/hub/models--mouli2001--phi3-fine-tuned-gastro/snapshots/42305a19c5088d583b73681eefa83417fc60db84/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/mouli/.cache/huggingface/hub/models--mouli2001--phi3-fine-tuned-gastro/snapshots/42305a19c5088d583b73681eefa83417fc60db84/tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [59]:
model = AutoModelForCausalLM.from_pretrained(hf_adapter_repo, trust_remote_code=True, torch_dtype="auto", device_map="cuda")

config.json:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

loading configuration file config.json from cache at /home/mouli/.cache/huggingface/hub/models--mouli2001--phi3-fine-tuned-gastro/snapshots/42305a19c5088d583b73681eefa83417fc60db84/config.json
loading configuration file config.json from cache at /home/mouli/.cache/huggingface/hub/models--mouli2001--phi3-fine-tuned-gastro/snapshots/42305a19c5088d583b73681eefa83417fc60db84/config.json
Model config Phi3Config {
  "_name_or_path": "mouli2001/phi3-fine-tuned-gastro",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 4096,
  "model_type": "phi3",

model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

loading weights file model.safetensors from cache at /home/mouli/.cache/huggingface/hub/models--mouli2001--phi3-fine-tuned-gastro/snapshots/42305a19c5088d583b73681eefa83417fc60db84/model.safetensors.index.json


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Will use torch_dtype=torch.bfloat16 as defined in model's config object
Instantiating Phi3ForCausalLM model under default dtype torch.bfloat16.
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "pad_token_id": 32000
}



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing Phi3ForCausalLM.

All the weights of Phi3ForCausalLM were initialized from the model checkpoint at mouli2001/phi3-fine-tuned-gastro.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.


generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

loading configuration file generation_config.json from cache at /home/mouli/.cache/huggingface/hub/models--mouli2001--phi3-fine-tuned-gastro/snapshots/42305a19c5088d583b73681eefa83417fc60db84/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}



In [61]:
from transformers import pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [81]:
pipe.tokenizer.apply_chat_template([{"role": "user", "content": df['Question'][0]}], tokenize=False, add_generation_prompt=True)

'<|user|>\nWhat is a gastric ulcer?<|end|>\n<|assistant|>\n'

In [83]:
def test_inference(prompt):
    prompt = pipe.tokenizer.apply_chat_template([{"role": "user", "content": prompt}], tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, num_beams=1, temperature=0.3, top_k=50, top_p=0.95, max_time= 180)
    return outputs[0]['generated_text'][len(prompt):].strip()

In [84]:
%%time

test_inference(df['Question'])

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You are not running the flash-attention implementation, expect numerical differences.


CPU times: user 7.77 s, sys: 32.3 ms, total: 7.8 s
Wall time: 7.78 s


'1. A gastric ulcer is a sore that forms on the lining of the stomach due to the erosion of the protective mucosal layer. This can lead to pain and discomfort, and if left untreated, can cause complications such as bleeding or perforation.\n\n2. Gastric ulcers are primarily caused by an infection with the bacterium Helicobacter pylori (H. pylori). Other factors that can contribute to the development of gastric ulcers include long-term use of nonsteroidal anti-inflammatory drugs (NSAIDs), excessive alcohol consumption, smoking, and certain medical conditions like Zollinger-Ellison syndrome.\n\n3. H. pylori infection contributes to gastric ulcer formation by damaging the protective mucosal layer of the stomach, making it more susceptible to the corrosive effects of gastric acid. This bacterium can also stimulate the production of stomach acid, further exacerbating the damage to the stomach lining.'

In [85]:
questions_text = """
What is a gastric ulcer?
What causes gastric ulcers?
How does H. pylori infection contribute to gastric ulcers?
What are the common symptoms of a gastric ulcer?
How are gastric ulcers and duodenal ulcers different?
What risk factors increase the likelihood of developing a gastric ulcer?
Can stress cause gastric ulcers?
How is a gastric ulcer diagnosed?
What tests are used to detect H. pylori infection?
Are gastric ulcers hereditary?
Can smoking contribute to gastric ulcers?
How do NSAIDs (nonsteroidal anti-inflammatory drugs) cause gastric ulcers?
Can alcohol consumption lead to gastric ulcers?
What lifestyle changes can help prevent gastric ulcers?
How does diet affect gastric ulcers?
What are the common treatments for gastric ulcers?
How effective are antibiotics in treating H. pylori-related ulcers?
How long does it typically take for a gastric ulcer to heal?
Are there any dietary changes recommended for someone with a gastric ulcer?
What foods should be avoided if you have a gastric ulcer?
Can lifestyle changes such as quitting smoking help with gastric ulcers?
What are the potential side effects of ulcer medications?
How often should I follow up with my doctor if I have a gastric ulcer?
Can gastric ulcers recur after treatment?
How do proton pump inhibitors (PPIs) work to treat gastric ulcers?
Is surgery ever required to treat a gastric ulcer?
What are the long-term effects of untreated gastric ulcers?
What are the signs that a gastric ulcer is bleeding?
How serious is a bleeding gastric ulcer?
What should I do if I suspect my gastric ulcer is bleeding?
Can a gastric ulcer lead to other complications such as perforation or obstruction?
How is a bleeding gastric ulcer treated in an emergency?
What are the symptoms of a perforated gastric ulcer?
How is a perforated gastric ulcer treated?
How does anemia relate to gastric ulcers?
Can chronic stomach ulcers lead to gastric cancer?
Are there any ways to prevent gastric ulcers?
How can I reduce my risk of developing a gastric ulcer?
Can long-term use of NSAIDs cause gastric ulcers?
What are the alternatives to NSAIDs if I have a history of gastric ulcers?
How often should I get screened for H. pylori if I've had a gastric ulcer before?
Are there any new treatments or research developments for gastric ulcers?
How does gastric acid secretion influence the formation of stomach ulcers?
What role do prostaglandins play in the pathophysiology of stomach ulcers?
How is the urea breath test used to diagnose H. pylori infection?
What are the mechanisms of action of proton pump inhibitors (PPIs) in ulcer treatment?
How do NSAIDs induce gastric mucosal injury leading to ulcers?
What is the role of endoscopy in the management of peptic ulcer disease?
How do you assess the severity of bleeding in a peptic ulcer patient?
What are the indications for surgical intervention in peptic ulcer disease?
How does H. pylori eradication therapy affect the recurrence of stomach ulcers?
How are refractory peptic ulcers managed clinically?
What are the long-term outcomes of patients with peptic ulcer bleeding?
How does chronic use of corticosteroids influence peptic ulcer formation?
How do you differentiate between benign and malignant gastric ulcers during endoscopy?
How do you manage patients with non-healing gastric ulcers despite standard treatment?
What are the indications for endoscopic biopsy in patients with suspected gastric ulcers?
How do you approach the treatment of gastric ulcers in patients with concurrent liver disease?
How do you tailor the treatment of peptic ulcers in elderly patients with multiple comorbidities?
What is your approach to managing patients with peptic ulcers who are on anticoagulant therapy?
What are the current guidelines for the use of proton pump inhibitors (PPIs) in peptic ulcer disease?
How do you handle cases of refractory H. pylori infection that do not respond to standard eradication regimens?
Is a bleeding gastric ulcer a sign of stomach cancer?
Where does acute pain from a gastric ulcer typically manifest?
What are the differences between gastritis, gastric erosion, and gastric ulcers?
Can black stool indicate a gastric ulcer?
What is the purpose of performing a tissue biopsy in cases of gastric ulcers?
What is the Sakita classification in gastric ulcers?
Could vomiting blood indicate a gastric ulcer?
Are there any significant blood test results when a patient has a gastric ulcer?
"""

In [87]:
questions_list = [question.strip() for question in questions_text.strip().split('\n') if question]

In [88]:
answers_dict = {}

In [89]:
for question in questions_list:
    result = test_inference(question)
    answers_dict[question] = result

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [90]:
import pandas as pd
df2 = pd.DataFrame(list(answers_dict.items()), columns=['Question', 'Answer'])

In [91]:
df2.to_csv('results-fine-tuned-phi3/gastric_ulcer_questions_answers.csv', index=False)

In [94]:
df2.to_excel('results-fine-tuned-phi3/gastric_ulcer_questions_answers_excel.xlsx', index=False)

In [96]:
df2

Unnamed: 0,Question,Answer
0,What is a gastric ulcer?,"A gastric ulcer, also known as a stomach ulcer..."
1,What causes gastric ulcers?,"Gastric ulcers, also known as stomach ulcers, ..."
2,How does H. pylori infection contribute to gas...,Helicobacter pylori (H. pylori) infection cont...
3,What are the common symptoms of a gastric ulcer?,Common symptoms of a gastric ulcer include:\n\...
4,How are gastric ulcers and duodenal ulcers dif...,Gastric ulcers and duodenal ulcers are both ty...
...,...,...
65,Can black stool indicate a gastric ulcer?,"Black stool, medically referred to as melena, ..."
66,What is the purpose of performing a tissue bio...,The purpose of performing a tissue biopsy in c...
67,What is the Sakita classification in gastric u...,The Sakita classification is not a recognized ...
68,Could vomiting blood indicate a gastric ulcer?,"Yes, vomiting blood can indicate a gastric ulc..."
