In [8]:
import datasets
import tempfile
import logging
import random
import config
import os
import yaml
import time
import torch
import transformers
import pandas as pd
import jsonlines
from utilities import *
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from transformers import TrainingArguments
from transformers import AutoModelForCausalLM
from llama import BasicModelRunner
from transformers.trainer_callback import TrainerCallback
import itertools

In [9]:
model_name = "EleutherAI/pythia-70m"
# Get the current directory
current_directory = os.getcwd()
# Join the folder path
folder_path = os.path.join(current_directory, "content")
dataset_name = "ai-medical-chatbot_processed.jsonl"
dataset_path = os.path.join(folder_path, dataset_name)
#dataset_path = f"/content/{dataset_name}"
use_hf = False
training_config = {
    "model": {
        "pretrained_name": model_name,
        "max_length" : 2048
    },
    "datasets": {
        "use_hf": use_hf,
        "path": dataset_path
    },
    "verbose": True
}
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
train_dataset, test_dataset = tokenize_and_split_data(training_config, tokenizer)
base_model = AutoModelForCausalLM.from_pretrained(model_name)
device_count = torch.cuda.device_count()
if device_count > 0:
    logger.debug("Select GPU device")
    device = torch.device("cuda")
else:
    logger.debug("Select CPU device")
    device = torch.device("cpu")

2024-04-09 23:29:45,708 - DEBUG - utilities - Config: datasets.path: c:\Blog\How-to-Finetuning-Large-Language-Models\content\ai-medical-chatbot_processed.jsonl
datasets.use_hf: false
model.max_length: 2048
model.pretrained_name: EleutherAI/pythia-70m
verbose: true



tokenize False c:\Blog\How-to-Finetuning-Large-Language-Models\content\ai-medical-chatbot_processed.jsonl


2024-04-09 23:29:46,097 - DEBUG - fsspec.local - open file: C:/Users/066226758/.cache/huggingface/datasets/json/default-f1c6af33428df321/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/dataset_info.json
2024-04-09 23:29:46,128 - DEBUG - fsspec.local - open file: C:/Users/066226758/.cache/huggingface/datasets/json/default-f1c6af33428df321/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/dataset_info.json
2024-04-09 23:29:47,276 - DEBUG - utilities - Select CPU device


In [10]:
from collections import Counter
import math

def cosine_similarity(str1, str2):
    """
    Computes the cosine similarity between two strings using the Bag-of-Words model.

    Args:
        str1: The first string.
        str2: The second string.

    Returns:
        A float representing the cosine similarity between the two strings.
    """
    # Tokenize the strings
    tokens1 = str1.split()
    tokens2 = str2.split()

    # Create bag of words for each string
    bow1 = Counter(tokens1)
    bow2 = Counter(tokens2)

    # Get the set of all unique words
    all_words = set(bow1.keys()).union(set(bow2.keys()))

    # Compute dot product
    dot_product = sum(bow1[word] * bow2[word] for word in all_words)

    # Compute magnitudes
    magnitude1 = math.sqrt(sum(bow1[word] ** 2 for word in all_words))
    magnitude2 = math.sqrt(sum(bow2[word] ** 2 for word in all_words))

    # Compute cosine similarity
    if magnitude1 == 0 or magnitude2 == 0:
        return 0
    else:
        return dot_product / (magnitude1 * magnitude2)


In [20]:
def inference_new(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=1000):
  # Tokenize
  input_ids = tokenizer.encode(
      text,
      return_tensors="pt",
      truncation=True,
      max_length=max_input_tokens
  )

  # Generate
  device = model.device
  attention_mask = torch.ones_like(input_ids)  # Create mask with all 1s

  # Fix: Mask all padding tokens, including the first element
  attention_mask[input_ids == tokenizer.pad_token_id] = 0

  generated_tokens_with_prompt = model.generate(
      input_ids.to(device),
      max_length=max_output_tokens,
      attention_mask=attention_mask,
      pad_token_id=tokenizer.eos_token_id  # Set pad token
  )

  # Decode
  generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

  # Strip the prompt
  generated_text_answer = generated_text_with_prompt[0][len(text):]
  return generated_text_answer



In [33]:
from transformers import AutoModelForCausalLM
import shutil
def train_model(hyperparameters, delete=False):
  max_steps = hyperparameters["max_steps"]


  # Convert hyperparameter values to integers and add them to the string
  hyperparameter_str = '_'.join(str(int(value)) if isinstance(value, (int, float)) else value for value in hyperparameters.values())
  # Create the trained_model_name variable
  trained_model_name = f"ai_medical_{hyperparameter_str}"

  #trained_model_name = f"ai_medical_{max_steps}_steps"
  output_dir = trained_model_name
  training_args = TrainingArguments(
    # Learning rate
    learning_rate=hyperparameters["learning_rate"],

    # Number of training epochs
    num_train_epochs=hyperparameters["num_train_epochs"],

    # Max steps to train for (each step is a batch of data)
    # Overrides num_train_epochs, if not -1
    max_steps=max_steps,

    # Batch size for training
    per_device_train_batch_size=hyperparameters["per_device_train_batch_size"],

    # Directory to save model checkpoints
    output_dir=output_dir,

    # Other arguments
    overwrite_output_dir=False, # Overwrite the content of the output directory
    disable_tqdm=False, # Disable progress bars
    eval_steps=120, # Number of update steps between two evaluations
    save_steps=120, # After # steps model is saved
    warmup_steps=1, # Number of warmup steps for learning rate scheduler
    per_device_eval_batch_size=1, # Batch size for evaluation
    evaluation_strategy="steps",
    logging_strategy="steps",
    logging_steps=1,
    optim=hyperparameters["optim"],
    gradient_accumulation_steps = hyperparameters['gradient_accumulation_steps'],
    gradient_checkpointing=False,
    # Parameters for early stopping
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="eval_loss",
    greater_is_better=False
  )
  base_model.to(device)
  model_flops = (
    base_model.floating_point_ops(
      {
        "input_ids": torch.zeros(
            (1, training_config["model"]["max_length"])
        )
      }
    )
    * training_args.gradient_accumulation_steps
  )

  #print(base_model)
  print("Memory footprint", base_model.get_memory_footprint() / 1e9, "GB")
  print("Flops", model_flops / 1e9, "GFLOPs")

  trainer = Trainer(
    model=base_model,
    model_flops=model_flops,
    total_steps=max_steps,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)
  training_output = trainer.train()
  # Evaluate the model
  eval_results = trainer.evaluate()

  # Adding Evaluation 
  save_dir = f'{output_dir}'
  trainer.save_model(save_dir)
  print("Saved model to:", save_dir)
  finetuned_slightly_model = AutoModelForCausalLM.from_pretrained(save_dir, local_files_only=True)
  finetuned_slightly_model.to(device)
  test_question = test_dataset[0]['question']
  print("Question input (test):", test_question)
  predicted_answer=inference_new(test_question, finetuned_slightly_model, tokenizer)
  print("Finetuned slightly model's answer: ")
  print(predicted_answer) 
  test_answer = test_dataset[0]['answer']
  print("Target answer output (test):", test_answer)
  metric_cosine_similarity=cosine_similarity(test_answer, predicted_answer)
  print("Cosine Similarity:", metric_cosine_similarity)
  # Deleting the folder to save space
  if delete:
    shutil.rmtree(save_dir)
    print("Deleted model folder:", save_dir)
  return eval_results, training_output, metric_cosine_similarity


In [34]:
hyperparameters={'learning_rate': 1e-06,
'num_train_epochs': 1,
'per_device_train_batch_size': 1,
'optim': 'adafactor',
'num_iterations': 1,
'max_steps':3,
'gradient_accumulation_steps':2}

In [35]:
eval_results, training_output, metric_cosine_similarity =train_model(hyperparameters, delete=True)

Memory footprint 0.30687256 GB
Flops 1097.833906176 GFLOPs


  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:00:08,175 - DEBUG - utilities - Step (1) Logs: {'loss': 4.4618, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


{'loss': 4.4618, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:00:09,039 - DEBUG - utilities - Step (2) Logs: {'loss': 4.2251, 'learning_rate': 5e-07, 'epoch': 0.0, 'iter_time': 0.863516092300415, 'flops': 1271353152494.6572, 'remaining_time': 0.863516092300415}


{'loss': 4.2251, 'learning_rate': 5e-07, 'epoch': 0.0, 'iter_time': 0.863516092300415, 'flops': 1271353152494.6572, 'remaining_time': 0.863516092300415}


2024-04-10 00:00:09,819 - DEBUG - utilities - Step (3) Logs: {'loss': 4.1862, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 0.8219277858734131, 'flops': 1335681704700.3687, 'remaining_time': 0.0}
2024-04-10 00:00:09,824 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 2.5603, 'train_samples_per_second': 2.343, 'train_steps_per_second': 1.172, 'total_flos': 91932868608.0, 'train_loss': 4.290998458862305, 'epoch': 0.01, 'iter_time': 0.8244308233261108, 'flops': 1331626468970.2683, 'remaining_time': 0.0}


{'loss': 4.1862, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 0.8219277858734131, 'flops': 1335681704700.3687, 'remaining_time': 0.0}
{'train_runtime': 2.5603, 'train_samples_per_second': 2.343, 'train_steps_per_second': 1.172, 'train_loss': 4.290998458862305, 'epoch': 0.01, 'iter_time': 0.8244308233261108, 'flops': 1331626468970.2683, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:00:16,872 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.449901580810547, 'eval_runtime': 7.0419, 'eval_samples_per_second': 14.201, 'eval_steps_per_second': 14.201, 'epoch': 0.01, 'iter_time': 4.348652124404907, 'flops': 252453835066.4767, 'remaining_time': 0.0}


Saved model to: ai_medical_0_1_1_adafactor_1_3_2
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?
Finetuned slightly model's answer: 


I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long l

In [5]:
test_text = test_dataset[0]['question']
print("Question input (test):", test_text)
print(f"Correct answer from ai-medical-chatbot: {test_dataset[0]['answer']}")
print("Model's answer: ")
#print(inference_new(test_text, base_model, tokenizer))

Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?
Correct answer from ai-medical-chatbot: Hello. I just read your query. See Kalarachi Kai choornam is helpful in amenorrhea. As far as small cysts are concerned they are unmatured eggs which failed to induce menstrual cycle previously, as a result, they got collected in the ovary and they will remain in the ovary. Now, you have got your periods you can start trying for conception. But I advise you to do it under the supervision of a nearby gynecologist because egg size is important while conception and that you can know by ovulation study. Ovulation study is performed under the supervision of a gynecologist. For gall stones, surgical intervention is required generally. Medicine is not of much help.
Model's answer: 


In [36]:
from tqdm import tqdm
import pandas as pd

In [37]:
def find_best_hyperparameters():
    best_hyperparameters = None
    best_loss = float('inf')
    # Lists to store data
    hyperparameters_list = []
    eval_results_list = []
    training_output_list = []
    cosine_similarity_list = []
    
    # Define hyperparameter search space
    hyperparameter_space = {
        "learning_rate": [1e-6, 1e-5, 1e-4],
        "num_train_epochs": [1,5,10,20],
        "per_device_train_batch_size": [1],
        "optim": ["adafactor"],
        "num_iterations": [1],
        "max_steps": [3],
        "gradient_accumulation_steps": [3],
    }
    # Generate all combinations of hyperparameters
    all_hyperparameters = list(itertools.product(*hyperparameter_space.values()))

    # Assuming all_hyperparameters is a list of hyperparameter combinations
    for hyperparameter_values in tqdm(all_hyperparameters):
        hyperparameters = dict(zip(hyperparameter_space.keys(), hyperparameter_values))
        
        # Evaluate the model
        # Print the current hyperparameters
        print("Using hyperparameters:")
        for key, value in hyperparameters.items():
            print(f"{key}: {value}")
        eval_results, training_output, metric_cosine_similarity = train_model(hyperparameters,delete=True)
        
        # Append data to lists
        hyperparameters_list.append(hyperparameters)
        eval_results_list.append(eval_results)
        training_output_list.append(training_output)
        cosine_similarity_list.append(metric_cosine_similarity)

        # Check if this set of hyperparameters gives better results
        if eval_results["eval_loss"] < best_loss:
                best_loss = eval_results["eval_loss"]
                best_hyperparameters = hyperparameters

    # Create DataFrame
    data = {
        'Hyperparameters': hyperparameters_list,
        'Evaluation Results': eval_results_list,
        'Training Output': training_output_list,
        'Cosine Similarity': cosine_similarity_list
    }
    df = pd.DataFrame(data)
    
    return best_hyperparameters, best_loss, df

In [38]:
# Call the function to find the best hyperparameters
best_hyperparameters, best_loss ,df = find_best_hyperparameters()

  0%|          | 0/12 [00:00<?, ?it/s]

Using hyperparameters:
learning_rate: 1e-06
num_train_epochs: 1
per_device_train_batch_size: 1
optim: adafactor
num_iterations: 1
max_steps: 3
gradient_accumulation_steps: 3
Memory footprint 0.30687256 GB
Flops 1646.750859264 GFLOPs


  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:08:29,201 - DEBUG - utilities - Step (1) Logs: {'loss': 4.3719, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
  0%|          | 0/12 [00:01<?, ?it/s]

{'loss': 4.3719, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:08:30,267 - DEBUG - utilities - Step (2) Logs: {'loss': 4.0918, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0710382461547852, 'flops': 1537527595467.4111, 'remaining_time': 1.0710382461547852}
  0%|          | 0/12 [00:02<?, ?it/s]

{'loss': 4.0918, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0710382461547852, 'flops': 1537527595467.4111, 'remaining_time': 1.0710382461547852}


2024-04-10 00:08:31,300 - DEBUG - utilities - Step (3) Logs: {'loss': 4.0965, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0521172285079956, 'flops': 1565178113848.8271, 'remaining_time': 0.0}
  0%|          | 0/12 [00:03<?, ?it/s]2024-04-10 00:08:31,307 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.2412, 'train_samples_per_second': 2.777, 'train_steps_per_second': 0.926, 'total_flos': 138837393408.0, 'train_loss': 4.1867062250773115, 'epoch': 0.01, 'iter_time': 1.0559263229370117, 'flops': 1559531970643.2134, 'remaining_time': 0.0}
  0%|          | 0/12 [00:03<?, ?it/s]

{'loss': 4.0965, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0521172285079956, 'flops': 1565178113848.8271, 'remaining_time': 0.0}
{'train_runtime': 3.2412, 'train_samples_per_second': 2.777, 'train_steps_per_second': 0.926, 'train_loss': 4.1867062250773115, 'epoch': 0.01, 'iter_time': 1.0559263229370117, 'flops': 1559531970643.2134, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:08:37,815 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.424636363983154, 'eval_runtime': 6.4975, 'eval_samples_per_second': 15.39, 'eval_steps_per_second': 15.39, 'epoch': 0.01, 'iter_time': 4.309926867485046, 'flops': 382083248717.6103, 'remaining_time': 0.0}


Saved model to: ai_medical_0_1_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


  8%|▊         | 1/12 [00:28<05:11, 28.34s/it]

Finetuned slightly model's answer: 


I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a v

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:08:57,277 - DEBUG - utilities - Step (1) Logs: {'loss': 4.3166, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
  8%|▊         | 1/12 [00:29<05:11, 28.34s/it]

{'loss': 4.3166, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:08:58,294 - DEBUG - utilities - Step (2) Logs: {'loss': 4.0109, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0172693729400635, 'flops': 1618795279862.4412, 'remaining_time': 1.0172693729400635}
  8%|▊         | 1/12 [00:30<05:11, 28.34s/it]

{'loss': 4.0109, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0172693729400635, 'flops': 1618795279862.4412, 'remaining_time': 1.0172693729400635}


2024-04-10 00:08:59,227 - DEBUG - utilities - Step (3) Logs: {'loss': 4.0395, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 0.9747419357299805, 'flops': 1689422398791.8962, 'remaining_time': 0.0}
  8%|▊         | 1/12 [00:31<05:11, 28.34s/it]2024-04-10 00:08:59,230 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 2.9149, 'train_samples_per_second': 3.088, 'train_steps_per_second': 1.029, 'total_flos': 138837393408.0, 'train_loss': 4.1223320960998535, 'epoch': 0.01, 'iter_time': 0.976625919342041, 'flops': 1686163378065.3972, 'remaining_time': 0.0}
  8%|▊         | 1/12 [00:31<05:11, 28.34s/it]

{'loss': 4.0395, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 0.9747419357299805, 'flops': 1689422398791.8962, 'remaining_time': 0.0}
{'train_runtime': 2.9149, 'train_samples_per_second': 3.088, 'train_steps_per_second': 1.029, 'train_loss': 4.1223320960998535, 'epoch': 0.01, 'iter_time': 0.976625919342041, 'flops': 1686163378065.3972, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:09:06,365 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.399848461151123, 'eval_runtime': 7.123, 'eval_samples_per_second': 14.039, 'eval_steps_per_second': 14.039, 'epoch': 0.01, 'iter_time': 4.543645262718201, 'flops': 362429451254.92566, 'remaining_time': 0.0}


Saved model to: ai_medical_0_5_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 17%|█▋        | 2/12 [00:56<04:42, 28.25s/it]

Finetuned slightly model's answer: 


I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a v

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:09:25,430 - DEBUG - utilities - Step (1) Logs: {'loss': 4.2632, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 17%|█▋        | 2/12 [00:57<04:42, 28.25s/it]

{'loss': 4.2632, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:09:26,434 - DEBUG - utilities - Step (2) Logs: {'loss': 3.9311, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0041427612304688, 'flops': 1639956909360.263, 'remaining_time': 1.0041427612304688}
 17%|█▋        | 2/12 [00:58<04:42, 28.25s/it]

{'loss': 3.9311, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0041427612304688, 'flops': 1639956909360.263, 'remaining_time': 1.0041427612304688}


2024-04-10 00:09:27,440 - DEBUG - utilities - Step (3) Logs: {'loss': 3.9849, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0054903030395508, 'flops': 1637759065687.6035, 'remaining_time': 0.0}
 17%|█▋        | 2/12 [00:59<04:42, 28.25s/it]2024-04-10 00:09:27,448 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 2.9288, 'train_samples_per_second': 3.073, 'train_steps_per_second': 1.024, 'total_flos': 138837393408.0, 'train_loss': 4.059745152791341, 'epoch': 0.01, 'iter_time': 1.009279727935791, 'flops': 1631609962712.7, 'remaining_time': 0.0}
 17%|█▋        | 2/12 [00:59<04:42, 28.25s/it]

{'loss': 3.9849, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0054903030395508, 'flops': 1637759065687.6035, 'remaining_time': 0.0}
{'train_runtime': 2.9288, 'train_samples_per_second': 3.073, 'train_steps_per_second': 1.024, 'train_loss': 4.059745152791341, 'epoch': 0.01, 'iter_time': 1.009279727935791, 'flops': 1631609962712.7, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:09:34,240 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.3765645027160645, 'eval_runtime': 6.7848, 'eval_samples_per_second': 14.739, 'eval_steps_per_second': 14.739, 'epoch': 0.01, 'iter_time': 4.4053168296813965, 'flops': 373809858162.48254, 'remaining_time': 0.0}


Saved model to: ai_medical_0_10_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 25%|██▌       | 3/12 [01:24<04:12, 28.02s/it]

Finetuned slightly model's answer: 


I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a very long list of cases. I have a v

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:09:53,239 - DEBUG - utilities - Step (1) Logs: {'loss': 4.2132, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 25%|██▌       | 3/12 [01:25<04:12, 28.02s/it]

{'loss': 4.2132, 'learning_rate': 1e-06, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:09:54,259 - DEBUG - utilities - Step (2) Logs: {'loss': 3.8522, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0203006267547607, 'flops': 1613985933245.744, 'remaining_time': 1.0203006267547607}
 25%|██▌       | 3/12 [01:26<04:12, 28.02s/it]

{'loss': 3.8522, 'learning_rate': 5e-07, 'epoch': 0.01, 'iter_time': 1.0203006267547607, 'flops': 1613985933245.744, 'remaining_time': 1.0203006267547607}


2024-04-10 00:09:55,179 - DEBUG - utilities - Step (3) Logs: {'loss': 3.934, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 0.9700396060943604, 'flops': 1697611982972.799, 'remaining_time': 0.0}
 25%|██▌       | 3/12 [01:27<04:12, 28.02s/it]2024-04-10 00:09:55,185 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 2.9441, 'train_samples_per_second': 3.057, 'train_steps_per_second': 1.019, 'total_flos': 138837393408.0, 'train_loss': 3.999807039896647, 'epoch': 0.01, 'iter_time': 0.9726808071136475, 'flops': 1693002316094.4253, 'remaining_time': 0.0}
 25%|██▌       | 3/12 [01:27<04:12, 28.02s/it]

{'loss': 3.934, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 0.9700396060943604, 'flops': 1697611982972.799, 'remaining_time': 0.0}
{'train_runtime': 2.9441, 'train_samples_per_second': 3.057, 'train_steps_per_second': 1.019, 'train_loss': 3.999807039896647, 'epoch': 0.01, 'iter_time': 0.9726808071136475, 'flops': 1693002316094.4253, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:10:02,433 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.3534440994262695, 'eval_runtime': 7.2422, 'eval_samples_per_second': 13.808, 'eval_steps_per_second': 13.808, 'epoch': 0.01, 'iter_time': 4.597127795219421, 'flops': 358212982675.0662, 'remaining_time': 0.0}


Saved model to: ai_medical_0_20_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 33%|███▎      | 4/12 [01:55<03:53, 29.13s/it]

Finetuned slightly model's answer: 


I have a very good idea to try this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot of experience with this. I have a lot 

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:10:24,279 - DEBUG - utilities - Step (1) Logs: {'loss': 4.1645, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 33%|███▎      | 4/12 [01:56<03:53, 29.13s/it]

{'loss': 4.1645, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:10:25,479 - DEBUG - utilities - Step (2) Logs: {'loss': 3.7741, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.1997606754302979, 'flops': 1372566123384.0554, 'remaining_time': 1.1997606754302979}
 33%|███▎      | 4/12 [01:57<03:53, 29.13s/it]

{'loss': 3.7741, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.1997606754302979, 'flops': 1372566123384.0554, 'remaining_time': 1.1997606754302979}


2024-04-10 00:10:26,489 - DEBUG - utilities - Step (3) Logs: {'loss': 3.7762, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.104989767074585, 'flops': 1490286071719.6553, 'remaining_time': 0.0}
 33%|███▎      | 4/12 [01:58<03:53, 29.13s/it]2024-04-10 00:10:26,497 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.3779, 'train_samples_per_second': 2.664, 'train_steps_per_second': 0.888, 'total_flos': 138837393408.0, 'train_loss': 3.904948075612386, 'epoch': 0.01, 'iter_time': 1.108798623085022, 'flops': 1485166760653.2808, 'remaining_time': 0.0}
 33%|███▎      | 4/12 [01:58<03:53, 29.13s/it]

{'loss': 3.7762, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.104989767074585, 'flops': 1490286071719.6553, 'remaining_time': 0.0}
{'train_runtime': 3.3779, 'train_samples_per_second': 2.664, 'train_steps_per_second': 0.888, 'train_loss': 3.904948075612386, 'epoch': 0.01, 'iter_time': 1.108798623085022, 'flops': 1485166760653.2808, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:10:34,341 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.159300804138184, 'eval_runtime': 7.8343, 'eval_samples_per_second': 12.764, 'eval_steps_per_second': 12.764, 'epoch': 0.01, 'iter_time': 5.030766725540161, 'flops': 327335960720.21924, 'remaining_time': 0.0}


Saved model to: ai_medical_0_1_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 42%|████▏     | 5/12 [02:27<03:31, 30.27s/it]

Finetuned slightly model's answer: 


A:

I think you should use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not use a "do not u

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:10:56,419 - DEBUG - utilities - Step (1) Logs: {'loss': 3.7539, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 42%|████▏     | 5/12 [02:28<03:31, 30.27s/it]

{'loss': 3.7539, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:10:57,454 - DEBUG - utilities - Step (2) Logs: {'loss': 3.0897, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.0353007316589355, 'flops': 1590601463813.6057, 'remaining_time': 1.0353007316589355}
 42%|████▏     | 5/12 [02:29<03:31, 30.27s/it]

{'loss': 3.0897, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.0353007316589355, 'flops': 1590601463813.6057, 'remaining_time': 1.0353007316589355}


2024-04-10 00:10:58,447 - DEBUG - utilities - Step (3) Logs: {'loss': 3.3709, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0138052701950073, 'flops': 1624326591779.548, 'remaining_time': 0.0}
 42%|████▏     | 5/12 [02:30<03:31, 30.27s/it]2024-04-10 00:10:58,451 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.0225, 'train_samples_per_second': 2.978, 'train_steps_per_second': 0.993, 'total_flos': 138837393408.0, 'train_loss': 3.4048125743865967, 'epoch': 0.01, 'iter_time': 1.016201138496399, 'flops': 1620496963524.9385, 'remaining_time': 0.0}
 42%|████▏     | 5/12 [02:30<03:31, 30.27s/it]

{'loss': 3.3709, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0138052701950073, 'flops': 1624326591779.548, 'remaining_time': 0.0}
{'train_runtime': 3.0225, 'train_samples_per_second': 2.978, 'train_steps_per_second': 0.993, 'train_loss': 3.4048125743865967, 'epoch': 0.01, 'iter_time': 1.016201138496399, 'flops': 1620496963524.9385, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:11:06,080 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 3.987677574157715, 'eval_runtime': 7.6198, 'eval_samples_per_second': 13.124, 'eval_steps_per_second': 13.124, 'epoch': 0.01, 'iter_time': 4.830623984336853, 'flops': 340898166490.1963, 'remaining_time': 0.0}


Saved model to: ai_medical_0_5_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 50%|█████     | 6/12 [02:56<02:59, 29.86s/it]

Finetuned slightly model's answer: 


A:

I think you should consult a doctor for a complete medical history.  If you have a history of ovarian cyst, then you should consult a doctor for a complete medical history.  If you have a history of ovarian cyst, then consult a doctor for a complete medical history.  If you have a history of ovarian cyst, then consult a doctor for a complete medical history.  If you have a history of ovarian cyst, consult a doctor for a complete medical history.  If you have a history of ovarian cyst, consult a doctor for a complete medical history.  If you have a history of ovarian cyst, consult a doctor for a complete medical history.  If you have a history of ovarian cyst, consult a doctor for a complete medical history.  If you have a history of ovarian cyst, consult a doctor for a complete medical history.  If you have a history of ovarian cyst, consult a doctor for a complete medical history.  If you have a history of ovarian cyst, consult a doctor for a 

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:11:25,484 - DEBUG - utilities - Step (1) Logs: {'loss': 3.369, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 50%|█████     | 6/12 [02:57<02:59, 29.86s/it]

{'loss': 3.369, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:11:26,584 - DEBUG - utilities - Step (2) Logs: {'loss': 2.4756, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.1001179218292236, 'flops': 1496885767051.1006, 'remaining_time': 1.1001179218292236}
 50%|█████     | 6/12 [02:58<02:59, 29.86s/it]

{'loss': 2.4756, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.1001179218292236, 'flops': 1496885767051.1006, 'remaining_time': 1.1001179218292236}


2024-04-10 00:11:27,654 - DEBUG - utilities - Step (3) Logs: {'loss': 2.9866, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0849488973617554, 'flops': 1517814215276.282, 'remaining_time': 0.0}
 50%|█████     | 6/12 [02:59<02:59, 29.86s/it]2024-04-10 00:11:27,658 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.1788, 'train_samples_per_second': 2.831, 'train_steps_per_second': 0.944, 'total_flos': 138837393408.0, 'train_loss': 2.943731149037679, 'epoch': 0.01, 'iter_time': 1.0869437456130981, 'flops': 1515028598223.4883, 'remaining_time': 0.0}
 50%|█████     | 6/12 [02:59<02:59, 29.86s/it]

{'loss': 2.9866, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0849488973617554, 'flops': 1517814215276.282, 'remaining_time': 0.0}
{'train_runtime': 3.1788, 'train_samples_per_second': 2.831, 'train_steps_per_second': 0.944, 'train_loss': 2.943731149037679, 'epoch': 0.01, 'iter_time': 1.0869437456130981, 'flops': 1515028598223.4883, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:11:35,239 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 3.842315673828125, 'eval_runtime': 7.5717, 'eval_samples_per_second': 13.207, 'eval_steps_per_second': 13.207, 'epoch': 0.01, 'iter_time': 4.877526760101318, 'flops': 337620056282.33453, 'remaining_time': 0.0}


Saved model to: ai_medical_0_10_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 58%|█████▊    | 7/12 [03:25<02:28, 29.64s/it]

Finetuned slightly model's answer: 


Hi, I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a ques

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:11:54,639 - DEBUG - utilities - Step (1) Logs: {'loss': 2.9971, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 58%|█████▊    | 7/12 [03:26<02:28, 29.64s/it]

{'loss': 2.9971, 'learning_rate': 1e-05, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:11:55,707 - DEBUG - utilities - Step (2) Logs: {'loss': 1.9608, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.0677645206451416, 'flops': 1542241596741.7944, 'remaining_time': 1.0677645206451416}
 58%|█████▊    | 7/12 [03:27<02:28, 29.64s/it]

{'loss': 1.9608, 'learning_rate': 5e-06, 'epoch': 0.01, 'iter_time': 1.0677645206451416, 'flops': 1542241596741.7944, 'remaining_time': 1.0677645206451416}


2024-04-10 00:11:56,699 - DEBUG - utilities - Step (3) Logs: {'loss': 2.6696, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0300534963607788, 'flops': 1598704208162.0403, 'remaining_time': 0.0}
 58%|█████▊    | 7/12 [03:28<02:28, 29.64s/it]2024-04-10 00:11:56,705 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.0564, 'train_samples_per_second': 2.945, 'train_steps_per_second': 0.982, 'total_flos': 138837393408.0, 'train_loss': 2.542485992113749, 'epoch': 0.01, 'iter_time': 1.0330538749694824, 'flops': 1594060967355.3054, 'remaining_time': 0.0}
 58%|█████▊    | 7/12 [03:28<02:28, 29.64s/it]

{'loss': 2.6696, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0300534963607788, 'flops': 1598704208162.0403, 'remaining_time': 0.0}
{'train_runtime': 3.0564, 'train_samples_per_second': 2.945, 'train_steps_per_second': 0.982, 'train_loss': 2.542485992113749, 'epoch': 0.01, 'iter_time': 1.0330538749694824, 'flops': 1594060967355.3054, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:12:04,184 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 3.747753620147705, 'eval_runtime': 7.4692, 'eval_samples_per_second': 13.388, 'eval_steps_per_second': 13.388, 'epoch': 0.01, 'iter_time': 4.772409200668335, 'flops': 345056509201.5552, 'remaining_time': 0.0}


Saved model to: ai_medical_0_20_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 67%|██████▋   | 8/12 [03:54<01:57, 29.38s/it]

Finetuned slightly model's answer: 


Hi. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a question for you. I have a ques

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:12:23,461 - DEBUG - utilities - Step (1) Logs: {'loss': 2.7182, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 67%|██████▋   | 8/12 [03:55<01:57, 29.38s/it]

{'loss': 2.7182, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:12:24,520 - DEBUG - utilities - Step (2) Logs: {'loss': 1.5383, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.059624433517456, 'flops': 1554089172705.8044, 'remaining_time': 1.059624433517456}
 67%|██████▋   | 8/12 [03:56<01:57, 29.38s/it]

{'loss': 1.5383, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.059624433517456, 'flops': 1554089172705.8044, 'remaining_time': 1.059624433517456}


2024-04-10 00:12:25,594 - DEBUG - utilities - Step (3) Logs: {'loss': 3.9501, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0665181875228882, 'flops': 1544043860225.9275, 'remaining_time': 0.0}
 67%|██████▋   | 8/12 [03:57<01:57, 29.38s/it]2024-04-10 00:12:25,601 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.0976, 'train_samples_per_second': 2.905, 'train_steps_per_second': 0.968, 'total_flos': 138837393408.0, 'train_loss': 2.735529979070028, 'epoch': 0.01, 'iter_time': 1.069452166557312, 'flops': 1539807866830.6204, 'remaining_time': 0.0}
 67%|██████▋   | 8/12 [03:57<01:57, 29.38s/it]

{'loss': 3.9501, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0665181875228882, 'flops': 1544043860225.9275, 'remaining_time': 0.0}
{'train_runtime': 3.0976, 'train_samples_per_second': 2.905, 'train_steps_per_second': 0.968, 'train_loss': 2.735529979070028, 'epoch': 0.01, 'iter_time': 1.069452166557312, 'flops': 1539807866830.6204, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:12:33,028 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 3.955519914627075, 'eval_runtime': 7.4187, 'eval_samples_per_second': 13.48, 'eval_steps_per_second': 13.48, 'epoch': 0.01, 'iter_time': 4.7835365533828735, 'flops': 344253846685.76074, 'remaining_time': 0.0}


Saved model to: ai_medical_0_1_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 75%|███████▌  | 9/12 [04:23<01:27, 29.21s/it]

Finetuned slightly model's answer: 
Hi. For further information consult a urologist online --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist --> urologist

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:12:52,439 - DEBUG - utilities - Step (1) Logs: {'loss': 2.1435, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 75%|███████▌  | 9/12 [04:24<01:27, 29.21s/it]

{'loss': 2.1435, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:12:53,469 - DEBUG - utilities - Step (2) Logs: {'loss': 0.4897, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.0297749042510986, 'flops': 1599136716641.5808, 'remaining_time': 1.0297749042510986}
 75%|███████▌  | 9/12 [04:25<01:27, 29.21s/it]

{'loss': 0.4897, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.0297749042510986, 'flops': 1599136716641.5808, 'remaining_time': 1.0297749042510986}


2024-04-10 00:12:54,454 - DEBUG - utilities - Step (3) Logs: {'loss': 2.7209, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0074119567871094, 'flops': 1634635015168.8726, 'remaining_time': 0.0}
 75%|███████▌  | 9/12 [04:26<01:27, 29.21s/it]2024-04-10 00:12:54,458 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.1437, 'train_samples_per_second': 2.863, 'train_steps_per_second': 0.954, 'total_flos': 138837393408.0, 'train_loss': 1.7846872011820476, 'epoch': 0.01, 'iter_time': 1.009319543838501, 'flops': 1631545598533.9495, 'remaining_time': 0.0}
 75%|███████▌  | 9/12 [04:26<01:27, 29.21s/it]

{'loss': 2.7209, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.0074119567871094, 'flops': 1634635015168.8726, 'remaining_time': 0.0}
{'train_runtime': 3.1437, 'train_samples_per_second': 2.863, 'train_steps_per_second': 0.954, 'train_loss': 1.7846872011820476, 'epoch': 0.01, 'iter_time': 1.009319543838501, 'flops': 1631545598533.9495, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:13:01,962 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.3177995681762695, 'eval_runtime': 7.4951, 'eval_samples_per_second': 13.342, 'eval_steps_per_second': 13.342, 'epoch': 0.01, 'iter_time': 4.761642575263977, 'flops': 345836721936.8008, 'remaining_time': 0.0}


Saved model to: ai_medical_0_5_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 83%|████████▎ | 10/12 [04:52<00:58, 29.13s/it]

Finetuned slightly model's answer: 
Hi. Is it a steroid?Hi. I have gone through your information and test reports and reports. For more information consult a urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> ur

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:13:21,250 - DEBUG - utilities - Step (1) Logs: {'loss': 2.1096, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 83%|████████▎ | 10/12 [04:53<00:58, 29.13s/it]

{'loss': 2.1096, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:13:22,338 - DEBUG - utilities - Step (2) Logs: {'loss': 0.351, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.0882236957550049, 'flops': 1513246647438.1367, 'remaining_time': 1.0882236957550049}
 83%|████████▎ | 10/12 [04:54<00:58, 29.13s/it]

{'loss': 0.351, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.0882236957550049, 'flops': 1513246647438.1367, 'remaining_time': 1.0882236957550049}


2024-04-10 00:13:23,297 - DEBUG - utilities - Step (3) Logs: {'loss': 2.2364, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.023516058921814, 'flops': 1608915507391.9507, 'remaining_time': 0.0}
 83%|████████▎ | 10/12 [04:55<00:58, 29.13s/it]2024-04-10 00:13:23,301 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.0576, 'train_samples_per_second': 2.943, 'train_steps_per_second': 0.981, 'total_flos': 138837393408.0, 'train_loss': 1.5656781792640686, 'epoch': 0.01, 'iter_time': 1.025452971458435, 'flops': 1605876529785.5964, 'remaining_time': 0.0}
 83%|████████▎ | 10/12 [04:55<00:58, 29.13s/it]

{'loss': 2.2364, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.023516058921814, 'flops': 1608915507391.9507, 'remaining_time': 0.0}
{'train_runtime': 3.0576, 'train_samples_per_second': 2.943, 'train_steps_per_second': 0.981, 'train_loss': 1.5656781792640686, 'epoch': 0.01, 'iter_time': 1.025452971458435, 'flops': 1605876529785.5964, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:13:30,892 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 4.942234039306641, 'eval_runtime': 7.5832, 'eval_samples_per_second': 13.187, 'eval_steps_per_second': 13.187, 'epoch': 0.01, 'iter_time': 4.820823550224304, 'flops': 341591191236.89557, 'remaining_time': 0.0}


Saved model to: ai_medical_0_10_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


 92%|█████████▏| 11/12 [05:21<00:29, 29.09s/it]

Finetuned slightly model's answer: 
Hi.Hello. For further information consult a urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online 

  0%|          | 0/3 [00:00<?, ?it/s]

2024-04-10 00:13:50,258 - DEBUG - utilities - Step (1) Logs: {'loss': 2.2212, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}
 92%|█████████▏| 11/12 [05:22<00:29, 29.09s/it]

{'loss': 2.2212, 'learning_rate': 0.0001, 'epoch': 0.0, 'iter_time': 0.0, 'flops': 0.0, 'remaining_time': 0.0}


2024-04-10 00:13:51,399 - DEBUG - utilities - Step (2) Logs: {'loss': 0.4669, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.1403756141662598, 'flops': 1444042505651.0493, 'remaining_time': 1.1403756141662598}
 92%|█████████▏| 11/12 [05:23<00:29, 29.09s/it]

{'loss': 0.4669, 'learning_rate': 5e-05, 'epoch': 0.01, 'iter_time': 1.1403756141662598, 'flops': 1444042505651.0493, 'remaining_time': 1.1403756141662598}


2024-04-10 00:13:52,372 - DEBUG - utilities - Step (3) Logs: {'loss': 1.3921, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.057012915611267, 'flops': 1557928796273.6855, 'remaining_time': 0.0}
 92%|█████████▏| 11/12 [05:24<00:29, 29.09s/it]2024-04-10 00:13:52,377 - DEBUG - utilities - Step (3) Logs: {'train_runtime': 3.1048, 'train_samples_per_second': 2.899, 'train_steps_per_second': 0.966, 'total_flos': 138837393408.0, 'train_loss': 1.360078861316045, 'epoch': 0.01, 'iter_time': 1.0595853328704834, 'flops': 1554146521453.6787, 'remaining_time': 0.0}
 92%|█████████▏| 11/12 [05:24<00:29, 29.09s/it]

{'loss': 1.3921, 'learning_rate': 0.0, 'epoch': 0.01, 'iter_time': 1.057012915611267, 'flops': 1557928796273.6855, 'remaining_time': 0.0}
{'train_runtime': 3.1048, 'train_samples_per_second': 2.899, 'train_steps_per_second': 0.966, 'train_loss': 1.360078861316045, 'epoch': 0.01, 'iter_time': 1.0595853328704834, 'flops': 1554146521453.6787, 'remaining_time': 0.0}


  0%|          | 0/100 [00:00<?, ?it/s]

2024-04-10 00:13:59,853 - DEBUG - utilities - Step (3) Logs: {'eval_loss': 5.07048225402832, 'eval_runtime': 7.4669, 'eval_samples_per_second': 13.392, 'eval_steps_per_second': 13.392, 'epoch': 0.01, 'iter_time': 4.797343850135803, 'flops': 343263045282.31464, 'remaining_time': 0.0}


Saved model to: ai_medical_0_20_1_adafactor_1_3_3
Question input (test): Will Kalarchikai cure multiple ovarian cysts in PCOD?


100%|██████████| 12/12 [05:50<00:00, 29.18s/it]

Finetuned slightly model's answer: 
Hi. I hope this helps. I hope. For more information consult a urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online --> urologist online -->




In [39]:
print("Best hyperparameters:", best_hyperparameters)
print("Best loss:", best_loss)

Best hyperparameters: {'learning_rate': 1e-05, 'num_train_epochs': 20, 'per_device_train_batch_size': 1, 'optim': 'adafactor', 'num_iterations': 1, 'max_steps': 3, 'gradient_accumulation_steps': 3}
Best loss: 3.747753620147705


In [52]:
# Sort the DataFrame by 'eval_loss' inside the 'Evaluation Results' column
df_sorted = df.sort_values(by='Evaluation Results', 
                           key=lambda x: x.apply(lambda d: d['eval_loss']))



In [54]:
df_sorted.head()

Unnamed: 0,Hyperparameters,Evaluation Results,Training Output,Cosine Similarity
7,"{'learning_rate': 1e-05, 'num_train_epochs': 2...","{'eval_loss': 3.747753620147705, 'eval_runtime...","(3, 2.542485992113749, {'train_runtime': 3.056...",0.20208
6,"{'learning_rate': 1e-05, 'num_train_epochs': 1...","{'eval_loss': 3.842315673828125, 'eval_runtime...","(3, 2.943731149037679, {'train_runtime': 3.178...",0.20208
8,"{'learning_rate': 0.0001, 'num_train_epochs': ...","{'eval_loss': 3.955519914627075, 'eval_runtime...","(3, 2.735529979070028, {'train_runtime': 3.097...",0.000816
5,"{'learning_rate': 1e-05, 'num_train_epochs': 5...","{'eval_loss': 3.987677574157715, 'eval_runtime...","(3, 3.4048125743865967, {'train_runtime': 3.02...",0.272767
4,"{'learning_rate': 1e-05, 'num_train_epochs': 1...","{'eval_loss': 4.159300804138184, 'eval_runtime...","(3, 3.904948075612386, {'train_runtime': 3.377...",0.142329


In [58]:
# Sort the DataFrame by 'Cosine Similarity' from largest to smallest
df_cos = df.sort_values(by='Cosine Similarity', ascending=False)

In [59]:
df_cos

Unnamed: 0,Hyperparameters,Evaluation Results,Training Output,Cosine Similarity
5,"{'learning_rate': 1e-05, 'num_train_epochs': 5...","{'eval_loss': 3.987677574157715, 'eval_runtime...","(3, 3.4048125743865967, {'train_runtime': 3.02...",0.272767
3,"{'learning_rate': 1e-06, 'num_train_epochs': 2...","{'eval_loss': 4.3534440994262695, 'eval_runtim...","(3, 3.999807039896647, {'train_runtime': 2.944...",0.225828
0,"{'learning_rate': 1e-06, 'num_train_epochs': 1...","{'eval_loss': 4.424636363983154, 'eval_runtime...","(3, 4.1867062250773115, {'train_runtime': 3.24...",0.224779
1,"{'learning_rate': 1e-06, 'num_train_epochs': 5...","{'eval_loss': 4.399848461151123, 'eval_runtime...","(3, 4.1223320960998535, {'train_runtime': 2.91...",0.224779
2,"{'learning_rate': 1e-06, 'num_train_epochs': 1...","{'eval_loss': 4.3765645027160645, 'eval_runtim...","(3, 4.059745152791341, {'train_runtime': 2.928...",0.224779
6,"{'learning_rate': 1e-05, 'num_train_epochs': 1...","{'eval_loss': 3.842315673828125, 'eval_runtime...","(3, 2.943731149037679, {'train_runtime': 3.178...",0.20208
7,"{'learning_rate': 1e-05, 'num_train_epochs': 2...","{'eval_loss': 3.747753620147705, 'eval_runtime...","(3, 2.542485992113749, {'train_runtime': 3.056...",0.20208
4,"{'learning_rate': 1e-05, 'num_train_epochs': 1...","{'eval_loss': 4.159300804138184, 'eval_runtime...","(3, 3.904948075612386, {'train_runtime': 3.377...",0.142329
9,"{'learning_rate': 0.0001, 'num_train_epochs': ...","{'eval_loss': 4.3177995681762695, 'eval_runtim...","(3, 1.7846872011820476, {'train_runtime': 3.14...",0.003605
11,"{'learning_rate': 0.0001, 'num_train_epochs': ...","{'eval_loss': 5.07048225402832, 'eval_runtime'...","(3, 1.360078861316045, {'train_runtime': 3.104...",0.001679
