## 1. Prerequisites: Connect Kaggle to Colab


In [None]:
# Install Kaggle CLI
!pip install -q kaggle

# Upload your kaggle.json to the Colab environment
from google.colab import files
files.upload()

# Set up the Kaggle directory
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download and unzip the specific dataset you provided
!kaggle datasets download -d meruvulikith/193k-medium-articles-dataset-for-llm-finetuning
!unzip 193k-medium-articles-dataset-for-llm-finetuning.zip

### 1.1 Upload kaggle.json file

{
  "username": "AAAA",
  "key": "KGAT_bbbbb"
}

### 1.2 Update the core libraries and restart the session

In [None]:
!pip install -U bitsandbytes accelerate transformers peft datasets


(in Colab: Runtime -> Restart session)

## 2. Dataset Loading and Fine-Tuning

In [None]:
import pandas as pd
from datasets import Dataset, load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

# 1. Load the Kaggle CSV
# Note: 193k rows is a lot! We'll take a subset (e.g., 1000) for a quick fine-tune.
df = pd.read_csv("193k.csv").head(1000)

# 2. Map the CSV columns to Instruction/Output format
def format_data(row):
    return {
        "instruction": f"Write a Medium article based on the following title: {row['title']}",
        "output": row['text']
    }

# Convert pandas to Hugging Face dataset
formatted_data = [format_data(row) for _, row in df.iterrows()]
dataset = Dataset.from_list(formatted_data)

# 3. Model & Tokenizer Setup (from your notebook)
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto"
)

# 4. Apply LoRA (from your notebook)
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# 5. Tokenization Logic
def format_prompt(example):
    return f"""### Instruction:
{example['instruction']}

### Response:
{example['output']}"""

def tokenize(example):
    prompt = format_prompt(example)
    tokenized = tokenizer(
        prompt,
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize)

# 6. Training Arguments & Execution
training_args = TrainingArguments(
    output_dir="./medium-finetune",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3, # 1 epoch is usually enough for a starting point
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)

trainer.train()

## 3. Define the Inference Function

In [None]:
import torch

def generate_response(instruction):
    # This structure matches the "### Instruction:" format we used during training
    prompt = f"""### Instruction:
{instruction}

### Response:
"""

    # Move inputs to the same device as the model (GPU)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,    # Increased slightly for Medium articles
            temperature=0.9,       # Creativity setting
            top_p=0.9,             # Nucleus sampling
            do_sample=True,        # Must be True to use temperature
            eos_token_id=tokenizer.eos_token_id
        )

    # Decode and remove the prompt from the output
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded

## 4. Testing Your "Divine" Creation

In [None]:
# Test with a topic common on Medium
instruction = "Write a Medium article about: The intersection of Artificial Intelligence and Human Creativity"
print(generate_response(instruction))

## 5. Saving the trained and Fine tunned model for your application

In [None]:
# Save the fine-tuned adapter
model.save_pretrained("medium-article-generator")
tokenizer.save_pretrained("medium-article-generator")

# Zip it for download
!zip -r medium_model.zip medium-article-generator

## 6. How to use the model on your application

To run this in a local application, you need to move the adapter weights (the medium_model.zip you created) from Colab to your laptop.

### 6.1 Download and Extract

Download medium_model.zip from Colab and extract it into your project folder on Windows. You should see files like adapter_model.safetensors and adapter_config.json.

### 6.2 Local Environment Setup

On your local machine/laptop, you will need a Python environment. Open your terminal (PowerShell or CMD) and run:

For Linux/Unix

In [None]:
pip install torch transformers peft accelerate bitsandbytes

For windows:

In [None]:
pip install torch transformers peft accelerate bitsandbytes-windows

Note: bitsandbytes on Windows can sometimes be tricky. If you don't have an NVIDIA GPU on your laptop, you can load the model in regular FP16 mode instead of 8-bit.

### 6.3 The Local Python Script

Here is the production-ready code to load your fine-tuned model for your application. This script loads the Base Model first, then overlays your LoRA Adapter.

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# 1. Configuration
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
LORA_ADAPTER_PATH = "./medium-article-generator"

# 2. Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

# 3. Load Base Model
# Use 'device_map=None' if you have low VRAM to avoid the "meta device" error
# Use 'dtype' instead of the deprecated 'torch_dtype'
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    dtype=torch.float16,
    device_map="auto"
)

# 4. CRITICAL: Load the adapter using the 'PeftModel' wrapper
# directly to avoid the lm_head key mismatch
try:
    model = PeftModel.from_pretrained(model, LORA_ADAPTER_PATH)
    print("Successfully merged adapter weights.")
except Exception as e:
    print(f"Standard load failed, attempting fallback: {e}")
    # Fallback: Sometimes local paths need absolute references on Windows
    import os
    abs_path = os.path.abspath(LORA_ADAPTER_PATH)
    model = PeftModel.from_pretrained(model, abs_path)

model.eval()


def generate_article(title):
    prompt = f"### Instruction:\nWrite a Medium article about: {title}\n\n### Response:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


if __name__ == "__main__":
    print(generate_article("The Future of Remote Work in 2026."))
