# ruslanmv/Automatic-Fine-Tunning
## For SageMaker/ Google Colab

In this notebook we are going to Fine Tune the Mixtral Model adapted to ai-medical-chatbot with more than 250k of records.


In [None]:
import psutil  # Install using `!pip install psutil` if needed
def get_cpu_and_memory_info():
    """Retrieves CPU usage (percentage) and total memory (GB) information.
    Returns:
        dict: A dictionary containing CPU usage and total memory information.
    """
    cpu_usage = psutil.cpu_percent(interval=1)  # Sample CPU usage every second
    total_memory = psutil.virtual_memory().total / (1024**3)  # Convert to GB

    return {"cpu_usage": cpu_usage, "total_memory": total_memory}

# Get CPU and memory information
info = get_cpu_and_memory_info()

print(f"CPU Usage: {info['cpu_usage']}%")
print(f"Total Memory: {info['total_memory']:.2f} GB")


In [None]:
import torch
import psutil
def get_gpu_info():
    if torch.cuda.is_available():
        gpu_count = torch.cuda.device_count()
        gpu_name = torch.cuda.get_device_name(0)
        return gpu_count, gpu_name
    else:
        return 0, "No GPU available"
# Get GPU information
gpu_count, gpu_name = get_gpu_info()
print(f"Number of GPUs: {gpu_count}")
print(f"GPU Name: {gpu_name}")


## Step 1 - Identification of Platform

In [None]:
try:
  from IPython.core.display import get_ipython
  is_colab =  get_ipython() is not None and get_ipython().get_fullname() == '__main__'
except:
  is_colab = False
if is_colab:
    print("You are on Google Colab!")
else:
    print("You are not on Google Colab.")
    try:
        import boto3
        # Assuming you have IAM permissions to list SageMaker notebook instances
        sagemaker_client = boto3.client('sagemaker')
        response = sagemaker_client.list_notebook_instances()
        # Check if any notebook instances are listed
        if len(response['NotebookInstances']) > 0:
            print("You are on SageMaker notebook instance.")
            is_sagemaker=True
        else:
            print("SageMaker API check inconclusive.")
    except Exception as e:
        print(f"An error occurred while checking with SageMaker API: {e}")
        print("Result inconclusive.")

## Step 2 - Environment Selection

In [None]:
import os
if is_colab:
    #@markdown # Connect Google Drive
    from google.colab import drive
    from IPython.display import clear_output
    import ipywidgets as widgets
    import os
    def inf(msg, style, wdth): inf = widgets.Button(description=msg, disabled=True, button_style=style, layout=widgets.Layout(min_width=wdth));display(inf)
    Shared_Drive = "" #@param {type:"string"}
    #@markdown - Leave empty if you're not using a shared drive
    print("[0;33mConnecting...")
    drive.mount('/content/gdrive')
    if Shared_Drive!="" and os.path.exists("/content/gdrive/Shareddrives"):
      mainpth="Shareddrives/"+Shared_Drive
    else:
      mainpth="MyDrive"
    clear_output()
    inf('\u2714 Done','success', '50px')
    #@markdown ---
else:
    env_name = os.environ.get("CONDA_DEFAULT_ENV", "")
    if env_name == "conda_pytorch_p310":
        print("Not detected Default Pytorch Environment")
        print("Installing missing packages")
        !pip3 install -qU torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
    else:
        print("Environment name:", env_name)

## Step 3 - Package Installation

In [None]:
import os
import sys
if is_colab:
    #@markdown # Install/Update ruslanmv repo
    from IPython.utils import capture
    from IPython.display import clear_output
    from subprocess import getoutput
    import ipywidgets as widgets
    import sys
    import fileinput
    import os
    import time
    import base64
    import requests
    from urllib.request import urlopen, Request
    from urllib.parse import urlparse, parse_qs, unquote
    from tqdm import tqdm
    import six
    blsaphemy = base64.b64decode("ZWJ1aQ==").decode('ascii')
    if not os.path.exists("/content/gdrive"):
        print('\033[1;31mGdrive not connected, using temporary colab storage ...')
        time.sleep(4)
        mainpth = "MyDrive"
        !mkdir -p /content/gdrive/$mainpth
        Shared_Drive = ""

    if Shared_Drive != "" and not os.path.exists("/content/gdrive/Shareddrives"):
        print('\033[1;31mShared drive not detected, using default MyDrive')
        mainpth = "MyDrive"

    with capture.capture_output() as cap:
        def inf(msg, style, wdth):
            inf = widgets.Button(description=msg, disabled=True, button_style=style, layout=widgets.Layout(min_width=wdth))
            display(inf)
        fgitclone = "git clone --depth 1"
        !mkdir -p /content/gdrive/$mainpth/llm
        # Define the path
        main_path =f"/content/gdrive/{mainpth}/"
        !git clone -q --branch master https://github.com/ruslanmv/Automatic-Fine-Tunning /content/gdrive/$mainpth/llm/Automatic-Fine-Tunning
        os.environ['TRANSFORMERS_CACHE'] = f"/content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/cache"
        os.environ['TORCH_HOME'] = f"/content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/cache"
        cache_dir = os.environ['TRANSFORMERS_CACHE']
        !mkdir -p /content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/repositories
        !git clone https://github.com/ruslanmv/Automatic-Fine-Tunning /content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/repositories/Automatic-Fine-Tunningebui-assets

    with capture.capture_output() as cap:
        %cd /content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/repositories/Automatic-Fine-Tunningebui-assets

        !git reset --hard
        !git checkout master
        time.sleep(1)
        !git pull
    clear_output()
    inf('\u2714 Done', 'success', '50px')
    #@markdown ---

In [None]:
def reload_environment():
    # Kernel restart logic (may not work consistently within Jupyter Notebook)
    try:
      from IPython import get_ipython
      get_ipython().kernel.do_shutdown(restart=True)
      print("Kernel restarted. Packages should be reloaded.")
    except Exception as e:
      print(f"Kernel restart failed: {e}")
      print("Consider manually restarting the kernel or your Jupyter Notebook server.")
if is_colab:
    #@markdown # Requirements
    print('[1;32mInstalling requirements...')
    with capture.capture_output() as cap:
      %cd /content/
      !wget -q -i https://github.com/ruslanmv/Automatic-Fine-Tunning/raw/master/Dependencies/requirements.txt
      !pip install -r requirements.txt
    clear_output()
    inf('\u2714 Done','success', '50px')
    #@markdown ---
if is_sagemaker:
    !pip install -qU transformers==4.36.2 accelerate==0.25.0 duckduckgo_search==4.1.0  python-dotenv
    !pip install -qU  bitsandbytes transformers==4.36.2 peft accelerate trl datasets==2.16.0 sentencepiece protobuf
    !wget -q https://github.com/ruslanmv/Automatic-Fine-Tunning/raw/master/Dependencies/requirements.txt -O requirements.txt
    !pip install -qU -r requirements.txt 
    #reload_environment()

##  Step 4  - Load Packages

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch
from datasets import load_dataset
from trl import SFTTrainer
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from datasets import Dataset
import re
import os

## Step 5 - Definitions

In [None]:
# base model from huggingFace or path to model
base_model = "mistralai/Mistral-7B-Instruct-v0.2"
# New model name

new_model = "Medical-Mixtral-7B-v250k"
num=256500

#new_model = "Medical-Mixtral-7B-v1k"
#num=1000



In [None]:
##  Loading Data
# Access the environment variable
if is_colab:
    from google.colab import userdata
    from google.colab import userdata
    secret_hf = userdata.get('HF_TOKEN')
else:
    import os
    from dotenv import load_dotenv
    # Check if .env file exists
    if not os.path.exists('.env'):
        # Print the URL for Hugging Face token
        print("Please go to the following URL and obtain your Hugging Face token:")
        print("https://huggingface.co/settings/tokens")
        print()
        # Prompt user to enter HF_TOKEN manually
        hf_token = input("Please enter your Hugging Face token: ")

        # Create or append to .env file
        with open('.env', 'a') as f:
            f.write(f"HF_TOKEN={hf_token}\n")

    # Load the .env file
    load_dotenv()
    # Retrieve the value of HF_TOKEN from the environment variables
    secret_hf = os.environ.get('HF_TOKEN')
    # Clear output to hide the token
    from IPython.display import clear_output
    clear_output()
    # Print the value of HF_TOKEN
    print("Loaded HF Token")

In [None]:
# Then you can use the token in your command
!huggingface-cli login --token $secret_hf

## Step 6 -  Datataset to Finetune

In [None]:
dataset_ = load_dataset("ruslanmv/ai-medical-chatbot")
train_data = dataset_["train"]


df = pd.DataFrame(train_data[::])
df = df[["Description", "Doctor"]].rename(columns={"Description": "question", "Doctor": "answer"})
# Clean the question and answer columns
df['question'] = df['question'].apply(lambda x: re.sub(r'\s+', ' ', x.strip()))
df['answer'] = df['answer'].apply(lambda x: re.sub(r'\s+', ' ', x.strip()))
# Assuming your DataFrame is named 'df' and the column is named 'df' and the column is named 'question'
df['question'] = df['question'].str.lstrip('Q. ')

In [None]:
df.head()

In [None]:
df_train = df.iloc[:num, :]
df_test = df.iloc[num:num+100, :]
# Save the train dataframe to a CSV file
df_train.to_csv('train.csv', index=False)
# Save the test dataframe to a CSV file
df_test.to_csv('test.csv', index=False)
df=df_train

## Step 7 - Formatting Your Fine-tuning Data
There are various ways to format your data for fine-tuning

Prompts provide context and guide the LLM towards the desired task. The code showcases creating prompts for question-answering tasks with placeholders for questions and answers.

In [None]:
# build training dataset with the right format
df['text'] = '[INST]@Enlighten. ' + df['question'] +'[/INST]'+ df['answer'] + ''

In [None]:
# remove columns
df=df.drop(['question','answer'],axis=1)

In [None]:
df.head()


In [None]:
df.columns

In [None]:
# convert to dataset object
dataset = ds.dataset(pa.Table.from_pandas(df).to_batches())
dataset = Dataset(pa.Table.from_pandas(df))
dataset

In [None]:
type(dataset)

In [None]:
statistics=True
#dataset_sample = dataset.select(range(500)) # Take only the first 500 records from the dataset
if statistics: 
    sequence_lengths = [len(example['text']) for example in dataset]
    # Calculate statistics
    max_length = max(sequence_lengths)
    min_length = min(sequence_lengths)
    mean_length = sum(sequence_lengths) / len(sequence_lengths)
    median_length = sorted(sequence_lengths)[len(sequence_lengths) // 2]
    
    print("Max length:", max_length)
    print("Min length:", min_length)
    print("Mean length:", mean_length)
    print("Median length:", median_length)

In [None]:
if is_sagemaker:
    # Get the current directory and join with the models folder
    current_directory = os.getcwd()
    cache_dir = os.path.join(current_directory, "models")
    main_path=current_directory

## Step 8 -  Downloading and Initializing Mixtral 8x7b

In [None]:
# Load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
        base_model,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
        cache_dir=cache_dir
)

In [None]:
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True, cache_dir=cache_dir)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.bos_token, tokenizer.eos_token

In [None]:
tokenizer.model_max_length = 1024  # Adjust this value based on your dataset and GPU memory

In [None]:
count_training=False
if count_training:
    # count trainging tokens
    from transformers import LlamaTokenizer
    tokenizer_ = LlamaTokenizer.from_pretrained("cognitivecomputations/dolphin-llama2-7b",
                                                cache_dir=cache_dir)
    tokens = tokenizer_.tokenize(dataset.to_pandas().to_string())
    len(tokens)

## Step 9 - Fine-Tune

In [None]:
#Adding the adapters in the layers
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

In [None]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=1,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)

In [None]:
is_default=True
if is_default:
    # Setting sft parameters
    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        peft_config=peft_config,
        #max_seq_length= None,
        max_seq_length=1024,  # Adjust based on your dataset and GPU memory
        dataset_text_field="text",
        tokenizer=tokenizer,
        args=training_arguments,
        packing= False,
    )

In [None]:
trainer.train()

## Step 10 - Save and push the adapter to HF

In [None]:
# New model name
new_model = "Medical-Mixtral-7B-v250k"
# Save the fine-tuned model
save_path = os.path.join(main_path, "llm", "Automatic-Fine-Tuning", "models", new_model)

In [None]:
trainer.model.save_pretrained(save_path)
model.config.use_cache = True
model.eval()

In [None]:
trainer.model.push_to_hub(new_model)

In [None]:
tokenizer.save_pretrained(save_path)

In [None]:
import json
# Create a Model Card
model_card = {
  "Model Name": "Medical-Mixtral-7B-250k",
  "Description": "Fine-tuned Mixtral model for answering medical assistance questions. This model is a novel version of mistralai/Mixtral-8x7B-Instruct-v0.1, adapted to a subset of 1.5k records from the AI Medical Chatbot dataset, which contains 250k records (https://huggingface.co/datasets/ruslanmv/ai-medical-chatbot). The purpose of this model is to provide a ready chatbot to answer questions related to medical assistance.",
  "Intended Use": "This model is intended for providing assistance and answering questions related to medical inquiries. It is suitable for use in chatbot applications where users seek medical advice, information, or assistance.",
  "Example Usage": "```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Define the name of your fine-tuned model\nfinetuned_model = 'ruslanmv/Medical-Assistance-Mixtral-7B-v1.5k'\n\n# Load tokenizer\ntokenizer = AutoTokenizer.from_pretrained(finetuned_model, trust_remote_code=True)\n\n# Load the model with the provided adapter configuration and weights\nmodel_pretrained = AutoModelForCausalLM.from_pretrained(finetuned_model, trust_remote_code=True, torch_dtype=torch.float16, cache_dir=cache_dir)\n\nmessages = [\n    {'role': 'user', 'content': 'What should I do to reduce my weight gained due to genetic hypothyroidism?'},\n    {'role': 'assistant', 'content': ''},\n]\n\ninput_ids = tokenizer.apply_chat_template(messages, return_tensors='pt').to('cuda')\n\noutputs = model_pretrained.generate(input_ids, max_new_tokens=500)\nprint(tokenizer.decode(outputs[0], skip_special_tokens=True))\n```",
  "Training Data": {
    "Dataset Name": "AI Medical Chatbot",
    "Dataset URL": "https://huggingface.co/datasets/ruslanmv/ai-medical-chatbot",
    "Dataset Size": "250k records",
    "Subset Used": "250k records"
  },
  "Limitations": [
    "The model's performance may vary depending on the complexity and specificity of the medical questions.",
    "The model may not provide accurate answers for every medical query, and users should consult medical professionals for critical healthcare concerns."
  ],
  "Ethical Considerations": [
    "Users should be informed that the model's responses are generated based on patterns in the training data and may not always be accurate or suitable for medical decision-making.",
    "The model should not be used as a replacement for professional medical advice or diagnosis.",
    "Sensitive patient data should not be shared with the model, and user privacy should be protected."
  ]
}

# Save the Model Card
model_card_path = os.path.join(save_path, "model_card.json")
with open(model_card_path, "w") as f:
    json.dump(model_card, f)

model.push_to_hub(new_model, model_card=model_card_path)

## Step 11 - Test the model

In [None]:
logging.set_verbosity(logging.CRITICAL)
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

In [None]:
def build_prompt(question):
  prompt=f"[INST]@Enlighten. {question} [/INST]"
  return prompt

In [None]:
question = "What does abutment of the nerve root mean?"
prompt = build_prompt(question)
result = pipe(prompt)

In [None]:
print(result[0]['generated_text'])

In [None]:
#test_path="test.csv"
test_path="train.csv"

In [None]:
f_test=pd.read_csv(test_path)

In [None]:

df_test.head()

In [None]:
#We test only 10 entries
df_test=df_test.head(10)

In [None]:
df_test

In [None]:
questionCounter = 0
correct = 0
promptEnding = "[/INST]"

# Guide for answering questions
testGuide = 'Answer the following question, at the end of your response say thank you for your query.\n'

# Loop through each row in the DataFrame
for index, row in df_test.iterrows():
    print("#############################")
    questionCounter += 1

    # Build the question prompt
    question = testGuide + row['question'] + "\n"
    print(question)

    # Get the true answer
    truth = row['answer']

    # Build the prompt
    prompt = build_prompt(question)

    # Generate answer
    result = pipe(prompt)
    llmAnswer = result[0]['generated_text']

    # Remove the prompt from the generated answer
    index = llmAnswer.find(promptEnding)
    llmAnswer = llmAnswer[len(promptEnding) + index:]

    print("LLM Answer:")
    print(llmAnswer)

    # Remove spaces from the generated answer
    llmAnswer = llmAnswer.replace(' ', '')

    # Find the option in response
    index = llmAnswer.find('answer:')

    # Find and match the option
    next_char = llmAnswer[index + len('answer:'):][0]
    if next_char in truth:
        correct += 1
        print('correct')
    else:
        print('wrong')

    # Update accuracy
    accuracy = correct / questionCounter
    print(f"Progress: {questionCounter / len(df_test)}")
    print(f"Accuracy: {accuracy}")

In [None]:

## Medical-Mixtral-7B-v1.5k

In [None]:
Testing model created

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
# Define the name of your fine-tuned model
finetuned_model = "ruslanmv/Medical-Mixtral-7B-v250k"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(finetuned_model,
                                          trust_remote_code=True,
                                          cache_dir=cache_dir)

In [None]:
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.bos_token, tokenizer.eos_token

In [None]:
# Load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model_pretrained = AutoModelForCausalLM.from_pretrained(
        finetuned_model,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
        cache_dir=cache_dir
)

In [None]:

model_pretrained.eval()
## Text Generation Base model

In [None]:
def build_prompt(question):
  prompt=f"[INST]@Enlighten. {question} [/INST]"
  return prompt

In [None]:
pipe = pipeline(task="text-generation", model=model_pretrained, tokenizer=tokenizer, max_length=100)


In [None]:
def ask(question):
  promptEnding = "[/INST]"
  # Guide for answering questions
  testGuide = 'Answer the following question, at the end of your response say thank you for your query.\n'
  # Build the question prompt
  question = testGuide + question + "\n"
  print(question)
  # Build the prompt
  prompt = build_prompt(question)
  # Generate answer
  result = pipe(prompt)
  llmAnswer = result[0]['generated_text']
  # Remove the prompt from the generated answer
  index = llmAnswer.find(promptEnding)
  llmAnswer = llmAnswer[len(promptEnding) + index:]
  print("LLM Answer:")
  print(llmAnswer)
     


In [None]:
question = "My symptoms after intercourse threatns me even after having negative HIV result. Kindly help."


In [None]:
ask(question)
