# Domain Adaptation

In [None]:
# Example Json for Training

[
  {
    "instruction": "Rearrange the furniture in a 12x14 living room for a wheelchair user.",
    "input": "Furniture: Sofa (200x90), Coffee table (100x60), Bookshelf (80x30), TV stand (120x40).\nConstraints: 90cm clear paths, 150cm turning radius near sofa.",
    "output": "Place sofa along north wall, TV stand opposite, leave 120cm clearance in center, move bookshelf to east wall, coffee table closer to south for free wheelchair access."
  },
  {
    "instruction": "Optimize a bedroom for elderly comfort and fall prevention.",
    "input": "Room: 10x12. Furniture: Bed, Wardrobe, Chair, Side table.",
    "output": "Place bed near center wall, keep side table within 40cm reach, wardrobe along west wall with 100cm clearance, chair near window but not blocking pathway."
  }
]


In [1]:
!ollama --version

ollama version is 0.11.10


In [22]:
import os
import json

# Paths
dataset_json = r"C:\Users\Admin\Documents\SmartFurnitureLayoutPlanner\constraints\bedroom_lora_dataset.json"
dataset_jsonl = r"C:\Users\Admin\Documents\SmartFurnitureLayoutPlanner\constraints\bedroom_lora_dataset.jsonl"

# Verify dataset exists
if not os.path.exists(dataset_json):
    raise FileNotFoundError(f"File not found: {dataset_json}")


In [23]:
with open(dataset_json, "r") as f:
    data = json.load(f)

with open(dataset_jsonl, "w") as f:
    for entry in data:
        prompt = f"{entry['instruction']}\n{entry['input']}"
        completion = entry['output']
        json_line = {"prompt": prompt, "completion": completion}
        f.write(json.dumps(json_line) + "\n")

print(f"JSONL dataset created successfully at: {dataset_jsonl}")


JSONL dataset created successfully at: C:\Users\Admin\Documents\SmartFurnitureLayoutPlanner\constraints\bedroom_lora_dataset.jsonl


In [26]:
!pip install torch transformers datasets accelerate peft bitsandbytes


Collecting transformers
  Downloading transformers-4.56.1-py3-none-any.whl.metadata (42 kB)
Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-1.10.1-py3-none-any.whl.metadata (19 kB)
Collecting peft
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-win_amd64.whl.metadata (11 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Downloading tokenizers-0.22.0-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.6.2-cp38-abi3-win_amd64.whl.metadata (4.1 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-21.0.0-cp312-cp312-win_amd64.whl.metadata (3.4 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading

In [None]:
from huggingface_hub import login
login("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")

In [10]:
!pip install mistral_inference



In [11]:
from huggingface_hub import snapshot_download
from pathlib import Path

mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
mistral_models_path.mkdir(parents=True, exist_ok=True)

snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

'C:\\Users\\Admin\\mistral_models\\7B-Instruct-v0.3'

In [12]:
from datasets import load_dataset

dataset = load_dataset(
    "json",
    data_files={
        "train": "../constraints/bedroom_lora_dataset.jsonl",
        "validation": "../constraints/bedroom_lora_dataset.jsonl"
    }
)

In [6]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="../constraints/bedroom_lora_dataset.jsonl", split="train")

dataset = dataset.train_test_split(test_size=0.2, seed=42)

print(dataset)


DatasetDict({
    train: Dataset({
        features: ['prompt', 'completion'],
        num_rows: 400
    })
    test: Dataset({
        features: ['prompt', 'completion'],
        num_rows: 100
    })
})


In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer

# 1. Load dataset
dataset = load_dataset(
    "json",
    data_files={
        "train": "../constraints/bedroom_lora_dataset.jsonl",
        "test": "../constraints/bedroom_lora_dataset.jsonl"
    },
    split={"train": "train[:80%]", "test": "train[80%:]"}
)

# 2. Load tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

# 3. Fix padding
tokenizer.pad_token = tokenizer.eos_token

# 4. Tokenize function
def tokenize_fn(example):
    text = f"### Instruction:\n{example['prompt']}\n\n### Response:\n{example['completion']}"
    return tokenizer(text, truncation=True, padding="max_length", max_length=1024)

# 5. Apply tokenizer
tokenized_dataset = dataset.map(tokenize_fn, batched=True, remove_columns=["prompt", "completion"])

print(tokenized_dataset)


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 1024
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 1024
    })
})


In [13]:
!pip install transformers accelerate peft datasets




In [4]:
pip install --upgrade transformers peft accelerate


Note: you may need to restart the kernel to use updated packages.


In [14]:
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

model_name = "mistralai/Mistral-7B-Instruct-v0.3"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",      # Automatically assigns layers to GPUs
    load_in_4bit=True,      # Optional: 4-bit quantization
)

# Define LoRA config
lora_config = LoraConfig(
    r=16,                   # Rank
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # Modules to apply LoRA
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)


ImportError: cannot import name 'PreTrainedModel' from 'transformers' (c:\Users\Admin\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\__init__.py)

In [18]:
import transformers, peft
print(transformers.__version__)
print(peft.__version__)


ImportError: cannot import name 'is_torch_tpu_available' from 'transformers.utils' (c:\Users\Admin\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\utils\__init__.py)

In [None]:
!pip install --upgrade pip
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu129
!pip install transformers==4.35.2
!pip install peft==0.6.0
!pip install accelerate==0.23.0
!pip install datasets sentencepiece


