In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define model path
MODEL_PATH = "/content/drive/MyDrive/lora-tinyllama-user1"

# Step 2: Check that adapter files exist
import os

if not os.path.exists(MODEL_PATH):
    raise FileNotFoundError(f"❌ Folder not found: {MODEL_PATH}")
else:
    contents = os.listdir(MODEL_PATH)
    print("📁 Contents of model folder:", contents)
    required_files = ["adapter_config.json", "adapter_model.safetensors"]
    for f in required_files:
        if f not in contents:
            raise FileNotFoundError(f"❌ Missing file: {f} in {MODEL_PATH}")

# Step 3: Install libraries
!pip install -q peft transformers accelerate bitsandbytes

# Step 4: Load model + tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch

base_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

print("📦 Loading base model...")
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

print("🧩 Loading LoRA adapter from:", MODEL_PATH)
model = PeftModel.from_pretrained(model, MODEL_PATH, is_trainable=False)
model.eval()

# Step 5: Define chat function
def generate_response(user_question):
    prompt = f"### Instruction:\n{user_question}\n\n### Response:\n"
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            max_new_tokens=150,
            do_sample=True,
            temperature=0.7,
            top_k=50,
            top_p=0.9
        )
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Only return text after the prompt
    return full_output.split("### Response:")[-1].strip()


# Step 6: Chat loop
print("💬 Finance Copilot ready. Ask your question (type 'exit' to quit):")
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("👋 Exiting.")
        break
    response = generate_response(user_input)
    print("Bot:", response.split("###")[-1].strip())


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
📁 Contents of model folder: ['README.md', 'adapter_model.safetensors', 'adapter_config.json', 'special_tokens_map.json', 'tokenizer.model', 'tokenizer.json', 'tokenizer_config.json', 'chat_template.jinja']
📦 Loading base model...
🧩 Loading LoRA adapter from: /content/drive/MyDrive/lora-tinyllama-user1
💬 Finance Copilot ready. Ask your question (type 'exit' to quit):
Bot: My total spend on groceries in August 2023 was $1,237.00
Bot: I don't have any specific habits or routines that would help you save $1,000 for buying a car. However, here are some general tips that could help:

1. Reduce expenses: By reducing expenses, you can save money for buying a car. This could include reducing your daily expenses like eating out or staying in expensive hotels.

2. Avoid impulse buying: Avoid impulse buying, especially when it comes to car purchases. If you feel the urge