In [None]:
! pip install torch transformers accelerate bitsandbytes datasets

In [None]:
! pip install transformers accelerate datasets


In [None]:
! pip install transformers accelerate torch datasets
! pip install bitsandbytes



In [None]:
from llama_cpp import Llama

# Load the quantized model
model_path = "mistral-13b-v0.1.Q3_K_M.gguf"
llm = Llama(model_path=model_path)

# Example prompt
prompt = "Explain the role of a Pulmonologist."
output = llm(prompt, max_tokens=100)
print(output)


In [None]:
import json

# Load symptom data
with open('scraped_data_subsections.json', 'r', encoding='utf-8') as file1:
    symptom_data = json.load(file1)

# Load doctor data
with open('cleaned_doctors_data_multiple_locations_cleaned.json', 'r', encoding='utf-8') as file2:
    doctor_data = json.load(file2)

# Initialize fine-tuning data
fine_tuning_data = []

# Process symptom data
for entry in symptom_data:
    if 'subsections' in entry:
        for subsection in entry['subsections']:
            heading = subsection.get('heading', 'No Heading')
            content = ' '.join(subsection.get('content', []))

            # Add user prompts and completions
            fine_tuning_data.extend([
                {"prompt": f"What should I do if I have {heading}?", "completion": content},
                {"prompt": f"Can you explain {heading}?", "completion": content},
                {"prompt": f"Is {heading} serious?", "completion": content}
            ])

# Process doctor data
for entry in doctor_data:
    doctor_name = entry.get('Doctor Name', 'Unknown Doctor')
    specialty = entry.get('Specialty', 'General')
    location = entry.get('Location', 'Unknown Location')
    address = entry.get('Address', 'Unknown Address')

    # Add user prompts and completions
    fine_tuning_data.extend([
        {"prompt": f"Give me a list of {specialty.lower()}s in {location}.",
         "completion": f"Here is a {specialty.lower()} in {location}: {doctor_name} at {address}."},
        {"prompt": f"Who is a good {specialty.lower()} in {location}?",
         "completion": f"{doctor_name} is a well-known {specialty.lower()} in {location}, located at {address}."},
        {"prompt": f"Where can I find a {specialty.lower()} near {location}?",
         "completion": f"You can find a {specialty.lower()} in {location}: {doctor_name} at {address}."}
    ])

# Save as JSONL
with open('fine_tuning_data.jsonl', 'w', encoding='utf-8') as output_file:
    for entry in fine_tuning_data:
        output_file.write(json.dumps(entry) + '\n')

print("Fine-tuning data has been generated successfully!")


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import os

# Path to the local model and tokenizer
local_model_path = "mistral-13b-v0.1.Q3_K_M.gguf"  # Replace with your exact model path

# Load the tokenizer from the local path
tokenizer = AutoTokenizer.from_pretrained(local_model_path, trust_remote_code=True)

# Load the model from the local path
model = AutoModelForCausalLM.from_pretrained(
    local_model_path,
    trust_remote_code=True,
    device_map="auto",  # Use GPU if available
    load_in_8bit=True   # Ensure 8-bit mode for quantized model
)

# Prepare the model for LoRA fine-tuning
model = prepare_model_for_kbit_training(model)

# Define LoRA configuration
lora_config = LoraConfig(
    r=16,  # LoRA rank
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # Adjust for your model architecture
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA
model = get_peft_model(model, lora_config)

# Load your fine-tuning dataset
dataset = load_dataset("json", data_files="fine_tuning_data.jsonl", split="train")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["prompt"], text_target=examples["completion"], truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    eval_steps=500,
    logging_dir="./logs",
    logging_steps=100,
    save_steps=500,
    save_total_limit=2,
    learning_rate=5e-5,
    weight_decay=0.01,
    per_device_train_batch_size=2,  # Adjust as per hardware
    num_train_epochs=3,
    gradient_accumulation_steps=4,
    fp16=True,
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Start fine-tuning
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

print("Fine-tuning completed successfully!")


In [35]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "meta-llama/Llama-2-13b-hf"  # Replace with your chosen model
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading shards: 100%|██████████| 3/3 [35:09<00:00, 703.33s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [00:22<00:00,  7.53s/it]
Some parameters are on the meta device because they were offloaded to the disk and cpu.


In [40]:
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
import torch
import os

# Disable symlink warnings
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"

# Load the model and tokenizer
model_name = "meta-llama/Llama-2-13b-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # Automatically distribute layers
    torch_dtype=torch.float16,  # Use mixed precision to save memory
    low_cpu_mem_usage=True  # Load with reduced CPU memory
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Enable gradient checkpointing
model.gradient_checkpointing_enable()

# Move model to the appropriate device (Ensure layers are fully loaded)
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

# Load fine-tuning dataset
dataset = load_dataset("json", data_files={"train": "fine_tuning_data.jsonl"})

# LoRA configuration
config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Target Mistral/LLaMA 2 layers
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA
lora_model = get_peft_model(model, config)
print("LoRA configuration applied successfully!")

# Define training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_model",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-5,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    fp16=True,  # Use mixed precision
    report_to="none"
)

# Ensure model is moved off the meta device
lora_model = lora_model.to("cuda" if torch.cuda.is_available() else "cpu")

# Initialize Trainer
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=dataset["train"],
    tokenizer=tokenizer
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
lora_model.save_pretrained("./fine_tuned_model")
print("Fine-tuning completed and model saved!")


Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.10s/it]
Some parameters are on the meta device because they were offloaded to the disk and cpu.
You shouldn't move a model that is dispatched using accelerate hooks.


RuntimeError: You can't move a model that has some modules offloaded to cpu or disk.