# ruslanmv/Automatic-Fine-Tunning
## For SageMaker/ Google Colab

In this notebook we are going to Fine Tune the Mixtral Model adapted to ai-medical-chatbot with more than 250k of records.


In [1]:
import psutil  # Install using `!pip install psutil` if needed
def get_cpu_and_memory_info():
    """Retrieves CPU usage (percentage) and total memory (GB) information.
    Returns:
        dict: A dictionary containing CPU usage and total memory information.
    """
    cpu_usage = psutil.cpu_percent(interval=1)  # Sample CPU usage every second
    total_memory = psutil.virtual_memory().total / (1024**3)  # Convert to GB

    return {"cpu_usage": cpu_usage, "total_memory": total_memory}

# Get CPU and memory information
info = get_cpu_and_memory_info()

print(f"CPU Usage: {info['cpu_usage']}%")
print(f"Total Memory: {info['total_memory']:.2f} GB")


CPU Usage: 0.7%
Total Memory: 239.86 GB


In [2]:
import torch
import psutil
def get_gpu_info():
    if torch.cuda.is_available():
        gpu_count = torch.cuda.device_count()
        gpu_name = torch.cuda.get_device_name(0)
        return gpu_count, gpu_name
    else:
        return 0, "No GPU available"
# Get GPU information
gpu_count, gpu_name = get_gpu_info()
print(f"Number of GPUs: {gpu_count}")
print(f"GPU Name: {gpu_name}")


Number of GPUs: 4
GPU Name: Tesla V100-SXM2-16GB


## Step 1 - Identification of Platform

In [2]:
try:
  from IPython.core.display import get_ipython
  is_colab =  get_ipython() is not None and get_ipython().get_fullname() == '__main__'
except:
  is_colab = False
if is_colab:
    print("You are on Google Colab!")
else:
    print("You are not on Google Colab.")
    try:
        import boto3
        # Assuming you have IAM permissions to list SageMaker notebook instances
        sagemaker_client = boto3.client('sagemaker')
        response = sagemaker_client.list_notebook_instances()
        # Check if any notebook instances are listed
        if len(response['NotebookInstances']) > 0:
            print("You are on SageMaker notebook instance.")
            is_sagemaker=True
        else:
            print("SageMaker API check inconclusive.")
    except Exception as e:
        print(f"An error occurred while checking with SageMaker API: {e}")
        print("Result inconclusive.")

You are not on Google Colab.
You are on SageMaker notebook instance.


## Step 2 - Environment Selection

In [3]:
import os
if is_colab:
    #@markdown # Connect Google Drive
    from google.colab import drive
    from IPython.display import clear_output
    import ipywidgets as widgets
    import os
    def inf(msg, style, wdth): inf = widgets.Button(description=msg, disabled=True, button_style=style, layout=widgets.Layout(min_width=wdth));display(inf)
    Shared_Drive = "" #@param {type:"string"}
    #@markdown - Leave empty if you're not using a shared drive
    print("[0;33mConnecting...")
    drive.mount('/content/gdrive')
    if Shared_Drive!="" and os.path.exists("/content/gdrive/Shareddrives"):
      mainpth="Shareddrives/"+Shared_Drive
    else:
      mainpth="MyDrive"
    clear_output()
    inf('\u2714 Done','success', '50px')
    #@markdown ---
else:
    env_name = os.environ.get("CONDA_DEFAULT_ENV", "")
    if env_name == "conda_pytorch_p310":
        print("Not detected Default Pytorch Environment")
        print("Installing missing packages")
        !pip3 install -qU torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
    else:
        print("Environment name:", env_name)

Environment name: pytorch_p310


## Step 3 - Package Installation

In [4]:
import os
import sys
if is_colab:
    #@markdown # Install/Update ruslanmv repo
    from IPython.utils import capture
    from IPython.display import clear_output
    from subprocess import getoutput
    import ipywidgets as widgets
    import sys
    import fileinput
    import os
    import time
    import base64
    import requests
    from urllib.request import urlopen, Request
    from urllib.parse import urlparse, parse_qs, unquote
    from tqdm import tqdm
    import six
    blsaphemy = base64.b64decode("ZWJ1aQ==").decode('ascii')
    if not os.path.exists("/content/gdrive"):
        print('\033[1;31mGdrive not connected, using temporary colab storage ...')
        time.sleep(4)
        mainpth = "MyDrive"
        !mkdir -p /content/gdrive/$mainpth
        Shared_Drive = ""

    if Shared_Drive != "" and not os.path.exists("/content/gdrive/Shareddrives"):
        print('\033[1;31mShared drive not detected, using default MyDrive')
        mainpth = "MyDrive"

    with capture.capture_output() as cap:
        def inf(msg, style, wdth):
            inf = widgets.Button(description=msg, disabled=True, button_style=style, layout=widgets.Layout(min_width=wdth))
            display(inf)
        fgitclone = "git clone --depth 1"
        !mkdir -p /content/gdrive/$mainpth/llm
        # Define the path
        main_path =f"/content/gdrive/{mainpth}/"
        !git clone -q --branch master https://github.com/ruslanmv/Automatic-Fine-Tunning /content/gdrive/$mainpth/llm/Automatic-Fine-Tunning
        os.environ['TRANSFORMERS_CACHE'] = f"/content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/cache"
        os.environ['TORCH_HOME'] = f"/content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/cache"
        cache_dir = os.environ['TRANSFORMERS_CACHE']
        !mkdir -p /content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/repositories
        !git clone https://github.com/ruslanmv/Automatic-Fine-Tunning /content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/repositories/Automatic-Fine-Tunningebui-assets

    with capture.capture_output() as cap:
        %cd /content/gdrive/{mainpth}/llm/Automatic-Fine-Tunning/{blsaphemy}/repositories/Automatic-Fine-Tunningebui-assets

        !git reset --hard
        !git checkout master
        time.sleep(1)
        !git pull
    clear_output()
    inf('\u2714 Done', 'success', '50px')
    #@markdown ---

In [5]:
def reload_environment():
    # Kernel restart logic (may not work consistently within Jupyter Notebook)
    try:
      from IPython import get_ipython
      get_ipython().kernel.do_shutdown(restart=True)
      print("Kernel restarted. Packages should be reloaded.")
    except Exception as e:
      print(f"Kernel restart failed: {e}")
      print("Consider manually restarting the kernel or your Jupyter Notebook server.")
if is_colab:
    #@markdown # Requirements
    print('[1;32mInstalling requirements...')
    with capture.capture_output() as cap:
      %cd /content/
      !wget -q -i https://github.com/ruslanmv/Automatic-Fine-Tunning/raw/master/Dependencies/requirements.txt
      !pip install -r requirements.txt
    clear_output()
    inf('\u2714 Done','success', '50px')
    #@markdown ---
if is_sagemaker:
    #!pip install -qU transformers==4.36.2 accelerate==0.25.0 duckduckgo_search==4.1.0  python-dotenv
    #!pip install -qU  bitsandbytes transformers==4.36.2 peft accelerate trl datasets==2.16.0 sentencepiece protobuf
    !wget -q https://github.com/ruslanmv/Automatic-Fine-Tunning/raw/master/Dependencies/requirements.txt -O requirements.txt
    !pip install -qU -r requirements.txt 
    #reload_environment()

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pathos 0.3.2 requires dill>=0.3.8, but you have dill 0.3.7 which is incompatible.
pathos 0.3.2 requires multiprocess>=0.70.16, but you have multiprocess 0.70.15 which is incompatible.[0m[31m
[0m

##  Step 4  - Load Packages

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch
from datasets import load_dataset
from trl import SFTTrainer
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from datasets import Dataset
import re
import os

## Step 5 - Definitions

In [7]:
# base model from huggingFace or path to model
base_model = "mistralai/Mistral-7B-Instruct-v0.2"
#base_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
# New model name

#new_model = "Medical-Mixtral-7B-v250k"
#num=256500

new_model = "Medical-Mixtral-7B-v2k"
num=2000

In [8]:
##  Loading Data
# Access the environment variable
if is_colab:
    from google.colab import userdata
    from google.colab import userdata
    secret_hf = userdata.get('HF_TOKEN')
else:
    import os
    from dotenv import load_dotenv
    # Check if .env file exists
    if not os.path.exists('.env'):
        # Print the URL for Hugging Face token
        print("Please go to the following URL and obtain your Hugging Face token:")
        print("https://huggingface.co/settings/tokens")
        print()
        # Prompt user to enter HF_TOKEN manually
        hf_token = input("Please enter your Hugging Face token: ")

        # Create or append to .env file
        with open('.env', 'a') as f:
            f.write(f"HF_TOKEN={hf_token}\n")

    # Load the .env file
    load_dotenv()
    # Retrieve the value of HF_TOKEN from the environment variables
    secret_hf = os.environ.get('HF_TOKEN')
    # Clear output to hide the token
    from IPython.display import clear_output
    clear_output()
    # Print the value of HF_TOKEN
    print("Loaded HF Token")

Loaded HF Token


In [9]:
# Then you can use the token in your command
!huggingface-cli login --token $secret_hf

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/ec2-user/.cache/huggingface/token
Login successful


## Step 6 -  Datataset to Finetune

In [11]:
dataset_ = load_dataset("ruslanmv/ai-medical-chatbot")
train_data = dataset_["train"]
df = pd.DataFrame(train_data[::])
df = df[["Description", "Doctor"]].rename(columns={"Description": "question", "Doctor": "answer"})
# Clean the question and answer columns
df['question'] = df['question'].apply(lambda x: re.sub(r'\s+', ' ', x.strip()))
df['answer'] = df['answer'].apply(lambda x: re.sub(r'\s+', ' ', x.strip()))
# Assuming your DataFrame is named 'df' and the column is named 'df' and the column is named 'question'
df['question'] = df['question'].str.lstrip('Q. ')
df['answer'] = df['answer'].str.replace('-->', '')

Downloading readme:   0%|          | 0.00/863 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/142M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/256916 [00:00<?, ? examples/s]

In [12]:
df.head()

Unnamed: 0,question,answer
0,What does abutment of the nerve root mean?,Hi. I have gone through your query with dilige...
1,What should I do to reduce my weight gained du...,Hi. You have really done well with the hypothy...
2,"I have started to get lots of acne on my face,...",Hi there Acne has multifactorial etiology. Onl...
3,Why do I have uncomfortable feeling between th...,Hello. The popping and discomfort what you fel...
4,My symptoms after intercourse threatns me even...,Hello. The HIV test uses a finger prick blood ...


In [13]:
df_train = df.iloc[:num, :]
df_test = df.iloc[num:num+100, :]
# Save the train dataframe to a CSV file
df_train.to_csv('train.csv', index=False)
# Save the test dataframe to a CSV file
df_test.to_csv('test.csv', index=False)
df=df_train

## Step 7 - Formatting Your Fine-tuning Data
There are various ways to format your data for fine-tuning

Prompts provide context and guide the LLM towards the desired task. The code showcases creating prompts for question-answering tasks with placeholders for questions and answers.

In [14]:
# build training dataset with the right format
df['text'] = '[INST]@Enlighten. ' + df['question'] +'[/INST]'+ df['answer'] + ''

In [15]:
# remove columns
df=df.drop(['question','answer'],axis=1)

In [16]:
df.head()

Unnamed: 0,text
0,[INST]@Enlighten. What does abutment of the ne...
1,[INST]@Enlighten. What should I do to reduce m...
2,[INST]@Enlighten. I have started to get lots o...
3,[INST]@Enlighten. Why do I have uncomfortable ...
4,[INST]@Enlighten. My symptoms after intercours...


In [17]:
df.columns

Index(['text'], dtype='object')

In [18]:
# convert to dataset object
dataset = ds.dataset(pa.Table.from_pandas(df).to_batches())
dataset = Dataset(pa.Table.from_pandas(df))
dataset

Dataset({
    features: ['text'],
    num_rows: 2000
})

In [19]:
type(dataset)

datasets.arrow_dataset.Dataset

In [20]:
statistics=True
#dataset_sample = dataset.select(range(500)) # Take only the first 500 records from the dataset
if statistics: 
    sequence_lengths = [len(example['text']) for example in dataset]
    # Calculate statistics
    max_length = max(sequence_lengths)
    min_length = min(sequence_lengths)
    mean_length = sum(sequence_lengths) / len(sequence_lengths)
    median_length = sorted(sequence_lengths)[len(sequence_lengths) // 2]
    print("Max length:", max_length)
    print("Min length:", min_length)
    print("Mean length:", mean_length)
    print("Median length:", median_length)

Max length: 3368
Min length: 106
Mean length: 646.119
Median length: 606


In [21]:
if is_sagemaker:
    # Get the current directory and join with the models folder
    current_directory = os.getcwd()
    cache_dir = os.path.join(current_directory, "models")
    main_path=current_directory

## Step 8 -  Downloading and Initializing Mixtral 8x7b

In [23]:
# Load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model = AutoModelForCausalLM.from_pretrained(
        base_model,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
        cache_dir=cache_dir
)

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [24]:
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

In [25]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True, cache_dir=cache_dir)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.bos_token, tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

('<s>', '</s>')

In [26]:
tokenizer.model_max_length = 1024  # Adjust this value based on your dataset and GPU memory

In [27]:
count_training=False
if count_training:
    # count trainging tokens
    from transformers import LlamaTokenizer
    tokenizer_ = LlamaTokenizer.from_pretrained("cognitivecomputations/dolphin-llama2-7b",
                                                cache_dir=cache_dir)
    tokens = tokenizer_.tokenize(dataset.to_pandas().to_string())
    len(tokens)

## Step 9 - Fine-Tune

In [28]:
#Adding the adapters in the layers
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

In [37]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=1,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)

In [39]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    #max_seq_length= None,
    max_seq_length=1024,  # Adjust based on your dataset and GPU memory
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [40]:
trainer.train()
clear_output()

## Step 10 - Save and push the adapter to HF

In [41]:
# Save the fine-tuned model
save_path = os.path.join(main_path, "llm", "Automatic-Fine-Tuning", "models", new_model)

In [42]:
save_path

'/home/ec2-user/SageMaker/container/llm/Automatic-Fine-Tuning/models/Medical-Mixtral-7B-v2k'

In [43]:
model.eval()
model.config.use_cache = True

In [44]:
# Save the model configuration
model.config.save_pretrained(save_path)

In [45]:
#Saving
trainer.model.save_pretrained(save_path)

In [46]:
tokenizer.save_pretrained(save_path)

('/home/ec2-user/SageMaker/container/llm/Automatic-Fine-Tuning/models/Medical-Mixtral-7B-v2k/tokenizer_config.json',
 '/home/ec2-user/SageMaker/container/llm/Automatic-Fine-Tuning/models/Medical-Mixtral-7B-v2k/special_tokens_map.json',
 '/home/ec2-user/SageMaker/container/llm/Automatic-Fine-Tuning/models/Medical-Mixtral-7B-v2k/tokenizer.model',
 '/home/ec2-user/SageMaker/container/llm/Automatic-Fine-Tuning/models/Medical-Mixtral-7B-v2k/added_tokens.json',
 '/home/ec2-user/SageMaker/container/llm/Automatic-Fine-Tuning/models/Medical-Mixtral-7B-v2k/tokenizer.json')

In [34]:
#model.save_pretrained(save_path)

In [47]:
# Creation of te README.md
# Example of use
example = f'''
from transformers import AutoModelForCausalLM, AutoTokenizer ,pipeline, logging, BitsAndBytesConfig
import os,torch
# Define the name of your fine-tuned model
finetuned_model = 'ruslanmv/{new_model}'

# Load fine-tuned model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
model_pretrained = AutoModelForCausalLM.from_pretrained(
        finetuned_model,
        load_in_4bit=True,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(finetuned_model,
                                          trust_remote_code=True)


pipe = pipeline(task="text-generation",
                model=model_pretrained, 
                tokenizer=tokenizer, max_length=500)
def build_prompt(question):
  prompt=f"[INST]@Enlighten. [/INST] {question}"
  return prompt

question = "Are my symptoms due to HIV infection? I had a high-risk exposure 15 months ago"
prompt = build_prompt(question)
result = pipe(prompt)
print(result[0]['generated_text'])



'''
# Create a Model Card
model_card = {
    "Model Name": "{}".format(new_model),
    "Description": "Fine-tuned Mixtral model for answering medical assistance questions. This model is a novel version of mistralai/Mixtral-8x7B-Instruct-v0.1, adapted to a subset of {}k records from the AI Medical Chatbot dataset, which contains 250k records (https://huggingface.co/datasets/ruslanmv/ai-medical-chatbot). The purpose of this model is to provide a ready chatbot to answer questions related to medical assistance.".format(num/1000),
    "Intended Use": "This model is intended for providing assistance and answering questions related to medical inquiries. It is suitable for use in chatbot applications where users seek medical advice, information, or assistance.",
    "Example Usage 1": example,
    "Training Data": {
        "Dataset Name": "AI Medical Chatbot",
        "Dataset URL": "https://huggingface.co/datasets/ruslanmv/ai-medical-chatbot",
        "Dataset Size": "250k records",
        "Subset Used": "{}k records".format(num/1000)
    },
    "Limitations": [
        "The model's performance may vary depending on the complexity and specificity of the medical questions.",
        "The model may not provide accurate answers for every medical query, and users should consult medical professionals for critical healthcare concerns."
    ],
    "Ethical Considerations": [
        "Users should be informed that the model's responses are generated based on patterns in the training data and may not always be accurate or suitable for medical decision-making.",
        "The model should not be used as a replacement for professional medical advice or diagnosis.",
        "Sensitive patient data should not be shared with the model, and user privacy should be protected."
    ]
}
# Additional information
library_name = "peft"
repository_name = f"ruslanmv/{new_model}"  # Specify the repository name here
# Generate README content
readme_content = f"""---
library_name: {library_name}
base_model: {base_model}
---

# {model_card['Model Name']}

## Description
{model_card['Description']}

## Intended Use
{model_card['Intended Use']}

## Installation 

pip install -qU  transformers==4.36.2  datasets python-dotenv peft bitsandbytes accelerate

## Example Usage
```python
{model_card['Example Usage 1']}
```

For Gpus
```python
{model_card['Example Usage 2']}
```

## Training Data
- **Dataset Name:** {model_card['Training Data']['Dataset Name']}
- **Dataset URL:** {model_card['Training Data']['Dataset URL']}
- **Dataset Size:** {model_card['Training Data']['Dataset Size']}
- **Subset Used:** {model_card['Training Data']['Subset Used']}

## Limitations
{model_card['Limitations'][0]}
{model_card['Limitations'][1]}

## Ethical Considerations
{model_card['Ethical Considerations'][0]}
{model_card['Ethical Considerations'][1]}
{model_card['Ethical Considerations'][2]}
"""
# Write README content to a file
with open("README.md", "w") as readme_file:
    readme_file.write(readme_content)
    
import json
# Save the Model Card
model_card_path = os.path.join(save_path, "model_card.json")
with open(model_card_path, "w") as f:
    json.dump(model_card, f)


In [54]:
# Push the model to the Hub, specifying the repository ID and any other optional parameters
model.push_to_hub(
    repo_id=new_model,  # Replace with your model
    use_temp_dir=True,  # Optional, whether to use a temporary directory
    commit_message="Upload " + new_model,  # Commit message with new_model name
    private=False,  # Optional, whether the repository should be private
    token=True,  # Optional, specify token or use default
    max_shard_size="5GB",  # Optional, maximum size for a checkpoint before being sharded
    create_pr=False,  # Optional, whether to create a PR
    safe_serialization=True,  # Optional, convert model weights to safetensors format
    revision=None,  # Optional, branch to push the uploaded files to
    commit_description=None,  # Optional, description of the commit
    model_card=model_card_path
)
#trainer.model.push_to_hub(new_model)
tokenizer.push_to_hub(new_model)

CommitInfo(commit_url='https://huggingface.co/ruslanmv/Medical-Mixtral-7B-v2k/commit/199267885dfa064f2ce40581a2fef3fddad3e963', commit_message='Upload tokenizer', commit_description='', oid='199267885dfa064f2ce40581a2fef3fddad3e963', pr_url=None, pr_revision=None, pr_num=None)

CommitInfo(commit_url='https://huggingface.co/ruslanmv/Medical-Mixtral-7B-v2k/commit/9d407ce7d17d62de058064ef21b5b6682f265295', commit_message='Upload tokenizer', commit_description='', oid='9d407ce7d17d62de058064ef21b5b6682f265295', pr_url=None, pr_revision=None, pr_num=None)

In [1]:
#help(model.push_to_hub)

In [55]:
# Save config.json
config_path_json = os.path.join(save_path, "config.json")

In [57]:
from huggingface_hub import HfApi
# Authenticate with Hugging Face 
api = HfApi()  # Replace with your authentication method
# Path to your README.md file

def upload_file(path, repository_name, path_in_repo):
    api.upload_file(
        path_or_fileobj=path,
        repo_id=repository_name,
        path_in_repo=path_in_repo  # Add this argument
    )
    print(f"{path_in_repo} uploaded to {repository_name}")

In [61]:
current_directory = os.getcwd()
# Combine the current directory with the README.md filename
readme_path = os.path.join(current_directory, "README.md")
# Upload the file to the root of the repository ("")
path_in_repo="README.md"
upload_file(readme_path, repository_name, path_in_repo)


README.md uploaded to ruslanmv/Medical-Mixtral-7B-v2k


In [63]:
config_path = os.path.join(save_path, "config.json")
# Upload the file to the root of the repository ("")
path_in_repo="config.json"
upload_file(config_path, repository_name, path_in_repo)

config.json uploaded to ruslanmv/Medical-Mixtral-7B-v2k


## Step 11 - Test the model

In [64]:
logging.set_verbosity(logging.CRITICAL)
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)

In [65]:
def build_prompt(question):
  prompt=f"[INST]@Enlighten. {question} [/INST]"
  return prompt

In [66]:
question = "What does abutment of the nerve root mean?"
prompt = build_prompt(question)
result = pipe(prompt)

In [67]:
print(result[0]['generated_text'])

[INST]@Enlighten. What does abutment of the nerve root mean? [/INST]Hi. I have gone through your query with diligence and would like you to know that I am here to help you. For further information consult a neurologist online ― ₹200. All the best. For more information consult a neurologist online ― ₹200. All the best. For more information consult a neurologist online ― ₹200. All the best. For more information consult a neurologist online ― ₹200. All the best. For more information consult a neurologist online ― ₹200. All the best. For more information consult a neurologist online ― ₹200. All the best. For more information consult a neurologist online ― ₹200. All the best. For more information consult


In [68]:
#test_path="test.csv"
test_path="train.csv"

In [69]:
f_test=pd.read_csv(test_path)

In [70]:
df_test.head()

Unnamed: 0,question,answer
2000,Can Yasmin birth control pill be used as an em...,"Hi. How are you doing? Yes, as you have heard,..."
2001,Please explain the poisonous effect of phenol ...,Hi. I want to assure you not to worry as every...
2002,For how long should I take Kalachikai powder t...,"Hello. For PCOD (polycystic ovarian disease), ..."
2003,Why do periods get delayed after first time sex?,Hello. As the serum beta hCG levels are less t...
2004,How to resolve peripheral vision problem?,Hi. Revert back with the report and answers to...


In [71]:
#We test only 10 entries
df_test=df_test.head(10)

In [72]:
df_test

Unnamed: 0,question,answer
2000,Can Yasmin birth control pill be used as an em...,"Hi. How are you doing? Yes, as you have heard,..."
2001,Please explain the poisonous effect of phenol ...,Hi. I want to assure you not to worry as every...
2002,For how long should I take Kalachikai powder t...,"Hello. For PCOD (polycystic ovarian disease), ..."
2003,Why do periods get delayed after first time sex?,Hello. As the serum beta hCG levels are less t...
2004,How to resolve peripheral vision problem?,Hi. Revert back with the report and answers to...
2005,Is there any way to treat pinworm infestation ...,"Hi. Best is Ivermectin. If resistant, use 5 % ..."
2006,What are the ways to get pregnant without pene...,"Hi, WeIcome to icliniq.com. I can understand y..."
2007,Can I take PPI to treat gurgling caused due to...,Hi. PPI alone will not work. Need to add some ...
2008,What should I do to be happy all over the day ...,"Hello. First of all, you should stop hating yo..."
2009,Kindly explain well defined altered cystic sig...,"Hello. Regarding your question, I would explai..."


In [73]:
questionCounter = 0
correct = 0
promptEnding = "[/INST]"

# Guide for answering questions
testGuide = 'Answer the following question, at the end of your response say thank you for your query.\n'

# Loop through each row in the DataFrame
for index, row in df_test.iterrows():
    print("#############################")
    questionCounter += 1

    # Build the question prompt
    question = testGuide + row['question'] + "\n"
    print(question)

    # Get the true answer
    truth = row['answer']

    # Build the prompt
    prompt = build_prompt(question)

    # Generate answer
    result = pipe(prompt)
    llmAnswer = result[0]['generated_text']

    # Remove the prompt from the generated answer
    index = llmAnswer.find(promptEnding)
    llmAnswer = llmAnswer[len(promptEnding) + index:]

    print("LLM Answer:")
    print(llmAnswer)

    # Remove spaces from the generated answer
    llmAnswer = llmAnswer.replace(' ', '')

    # Find the option in response
    index = llmAnswer.find('answer:')

    # Find and match the option
    next_char = llmAnswer[index + len('answer:'):][0]
    if next_char in truth:
        correct += 1
        print('correct')
    else:
        print('wrong')

    # Update accuracy
    accuracy = correct / questionCounter
    print(f"Progress: {questionCounter / len(df_test)}")
    print(f"Accuracy: {accuracy}")

#############################
Answer the following question, at the end of your response say thank you for your query.
Can Yasmin birth control pill be used as an emergency contraceptive pill?

LLM Answer:
Hi. How are you doing? Yes, as you have heard, Yasmin can be used as an emergency pill. Even if you have not ejaculated into her, she stands a chance of pregnancy if the pre-seminal fluid, the clear fluid that comes out before semen, which is rich in young healthy sperms, comes in contact with her genitals. As soon as possible, earlier than 72 hours, I suggest taking four tablets of Yasmin (combination of Ethinyl Estradiol and Drospirenone) at the time, repeat four more tablets after 12 hours. This being a high dose of hormones, it will suddenly increase the inner l
correct
Progress: 0.1
Accuracy: 1.0
#############################
Answer the following question, at the end of your response say thank you for your query.
Please explain the poisonous effect of phenol ingestion.

LLM Answ



LLM Answer:
Hello. I have gone through your query and understand your concerns. For further information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information consult a neurologist online ☑️. For more information
correct
Progress: 1.0
Accuracy: 0.9


# Medical-Mixtral-7B

### Testing model created

In [11]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging, BitsAndBytesConfig
import os, torch

# Define the name of your fine-tuned model
finetuned_model = 'ruslanmv/Medical-Mixtral-7B-v2k'

# Load fine-tuned model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)
model_pretrained = AutoModelForCausalLM.from_pretrained(
    finetuned_model,
    load_in_4bit=True,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(finetuned_model, trust_remote_code=True)

# Set pad_token_id to eos_token_id
model_pretrained.config.pad_token_id = tokenizer.eos_token_id

pipe = pipeline(task="text-generation", model=model_pretrained, tokenizer=tokenizer, max_length=100)

def build_prompt(question):
  prompt=f"[INST]@Enlighten. {question} [/INST]"
  return prompt



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [15]:
question = "Are my symptoms due to HIV infection? I had a high-risk exposure 15 months ago"
prompt = build_prompt(question)

# Generate text based on the prompt
result = pipe(prompt)[0]
generated_text = result['generated_text']

# Remove the prompt from the generated text
generated_text = generated_text.replace(prompt, "", 1).strip()

print(generated_text)

Hello. I understand your concerns. I would like to assure you that you are not at risk of HIV infection. I hope this has helped you. Do write back if you have any more queries. All the best. For more information consult an internal medicine physician online ➜ www.iclinic.com/hiv


In [12]:
def ask(question):
  promptEnding = "[/INST]"
  # Guide for answering questions
  testGuide = 'Answer the following question, at the end of your response say thank you for your query.\n'
  # Build the question prompt
  question = testGuide + question + "\n"
  print(question)
  # Build the prompt
  prompt = build_prompt(question)
  # Generate answer
  result = pipe(prompt)
  llmAnswer = result[0]['generated_text']
  # Remove the prompt from the generated answer
  index = llmAnswer.find(promptEnding)
  llmAnswer = llmAnswer[len(promptEnding) + index:]
  print("LLM Answer:")
  print(llmAnswer)

In [13]:
question = "For how long should I take Kalachikai powder to overcome PCOD problem?"

In [14]:
ask(question)

Answer the following question, at the end of your response say thank you for your query.
For how long should I take Kalachikai powder to overcome PCOD problem?

LLM Answer:
Hello. For PCOD (polycystic ovarian disease), powder Kalachikai has a good effect, but the duration of treatment is long (more than three months). For proper evaluation of disease, please go for USG
