In [None]:
!pip install -q transformers accelerate bitsandbytes

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_id = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"

# Load tokenizer and quantized model
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    revision="main"
)

# Create generation pipeline
gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

# Function to generate blog post
def write_blog(topic, tone="informative", max_new_tokens=600):
    prompt = f"""[INST] Write a {tone} blog post about "{topic}". The blog should include an introduction, 2-3 main points, and a conclusion. [/INST]"""
    out = gen(prompt, max_new_tokens=max_new_tokens, temperature=0.7, do_sample=True, top_p=0.95)[0]["generated_text"]
    print("\nGenerated Blog Post:\n")
    print(out.replace(prompt, "").strip())

# CLI loop
print("🔥 GenAI Blog Post Writer - Mistral 7B")
print("Enter a blog topic (or type 'exit')")
while True:
    topic = input("\nTopic: ")
    if topic.lower().strip() == "exit":
        break
    tone = input("Tone (e.g. persuasive, casual, default: informative): ").strip() or "informative"
    write_blog(topic, tone)
