In [None]:
import os

SRC_DIR = "/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507"

print("Looking under:", SRC_DIR)
for root, dirs, files in os.walk(SRC_DIR):
    for f in files:
        if f.lower().endswith((".srt", ".ass", ".txt")):
            print(os.path.join(root, f))

Looking under: /content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507/Naruto Season 2 - 33.ass
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507/Naruto Season 9 - 214.ass
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507/Naruto Season 7 - 169.ass
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507/Naruto Season 4 - 82.ass
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507/Naruto Season 1 - 09.ass
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507/Naruto Season 5 - 128.ass
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507/Naruto Season 6 - 150.ass
/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206

In [None]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.93.0
    Uninstalling openai-1.93.0:
      Successfully uninstalled openai-1.93.0
Successfully installed openai-0.28.0


In [2]:
import os
import re

# — CONFIGURATION —
SRC_DIR = "/content/naruto_shippuden/src/data/subtitles/[Subtitlist] naruto-season-1-english-2206507"
OUT_DIR = "/content/clean_subtitles_txt"

os.makedirs(OUT_DIR, exist_ok=True)

# Regexes to detect and remove formatting/timestamps
FORMAT_TAGS = re.compile(r"{[^}]+}|<[^>]+>")
SRT_TIMESTAMP = re.compile(r"\d{1,2}:\d{2}:\d{2}[,\.]\d{1,3}\s*-->\s*\d{1,2}:\d{2}:\d{2}[,\.]\d{1,3}")

def clean_ass(path):
    """
    For ASS files: read [Events] > Dialogue: lines,
    take the 10th comma‐separated field, strip tags.
    """
    lines = []
    in_events = False
    with open(path, encoding="utf-8", errors="ignore") as f:
        for raw in f:
            line = raw.rstrip("\n")
            if line.startswith("[Events]"):
                in_events = True
                continue
            if not in_events:
                continue
            if not line.startswith("Dialogue:"):
                continue
            # split into at most 10 parts, last part is text
            parts = line.split(",", 9)
            if len(parts) < 10:
                continue
            text = parts[9].strip()
            text = FORMAT_TAGS.sub("", text)         # remove {…} and <…>
            text = text.replace("\\N", " ")          # convert ASS newline
            if text:
                lines.append(text)
    return lines

def clean_srt(path):
    """
    For SRT files: skip block numbers and timestamps,
    collect all other lines (strip formatting tags).
    """
    lines = []
    with open(path, encoding="utf-8", errors="ignore") as f:
        for raw in f:
            line = raw.strip()
            if not line:
                continue
            if line.isdigit():
                continue
            if SRT_TIMESTAMP.match(line):
                continue
            # now it's subtitle text
            text = FORMAT_TAGS.sub("", line)
            if text:
                lines.append(text)
    return lines

def process_all():
    for root, _, files in os.walk(SRC_DIR):
        for fname in files:
            ext = os.path.splitext(fname)[1].lower()
            if ext not in {".ass", ".srt"}:
                continue

            src_path = os.path.join(root, fname)
            ep_id    = os.path.splitext(fname)[0]
            out_path = os.path.join(OUT_DIR, ep_id + ".txt")

            if ext == ".ass":
                cleaned = clean_ass(src_path)
            else:
                cleaned = clean_srt(src_path)

            # Write out only if we found dialogue
            if cleaned:
                with open(out_path, "w", encoding="utf-8") as outf:
                    outf.write("\n".join(cleaned))
                print(f"→ {ep_id}: {len(cleaned)} lines")
            else:
                print(f"→ {ep_id}: no dialogue found, skipped")

if __name__ == "__main__":
    process_all()


→ Naruto Season 2 - 50: 260 lines
→ Naruto Season 4 - 80: 210 lines
→ Naruto Season 4 - 99: 284 lines
→ Naruto Season 1 - 24: 293 lines
→ Naruto Season 2 - 34: 311 lines
→ Naruto Season 2 - 31: 299 lines
→ Naruto Season 4 - 89: 268 lines
→ Naruto Season 1 - 23: 336 lines
→ Naruto Season 7 - 166: 241 lines
→ Naruto Season 3 - 77: 267 lines
→ Naruto Season 6 - 138: 278 lines
→ Naruto Season 3 - 76: 221 lines
→ Naruto Season 3 - 58: 265 lines
→ Naruto Season 4 - 96: 224 lines
→ Naruto Season 4 - 87: 294 lines
→ Naruto Season 3 - 65: 245 lines
→ Naruto Season 1 - 16: 236 lines
→ Naruto Season 8 - 193: 359 lines
→ Naruto Season 3 - 70: 258 lines
→ Naruto Season 6 - 134: 155 lines
→ Naruto Season 3 - 69: 257 lines
→ Naruto Season 7 - 172: 228 lines
→ Naruto Season 6 - 154: 256 lines
→ Naruto Season 9 - 220: 280 lines
→ Naruto Season 1 - 05: 286 lines
→ Naruto Season 4 - 83: 305 lines
→ Naruto Season 1 - 15: 260 lines
→ Naruto Season 7 - 160: 226 lines
→ Naruto Season 4 - 92: 224 lines
→ Naru

In [4]:
# (Re)install the OpenAI SDK
!pip install --upgrade openai

import os
import re
import json
import time
from glob import glob
from google.colab import files
import openai

# —— CONFIG ——
CLEAN_DIR         = "/content/clean_subtitles_txt"
OUTPUT_JSON       = "/content/naruto_dataset.json"
OPENAI_API_KEY    = "api key"
MODEL_NAME        = "gpt-4o-mini"
MAX_RETRIES       = 3
PAUSE_BETWEEN_REQ = 1  # seconds

openai.api_key = OPENAI_API_KEY

PROMPT_TEMPLATE = """
You are an expert on Naruto Shippuden.
Given the full episode transcript below (dialogue only, no timestamps):

{transcript}

Please output exactly one JSON object with three fields:

1. "theme": a short label (or comma-separated labels) for the main theme(s) (e.g. "friendship", "sacrifice").
2. "summary": a concise (2–3 sentence) summary of the episode’s plot.
3. "naruto_role_personality_thinking": a short paragraph (2–3 sentences) describing Naruto’s role in the episode, quoting or paraphrasing the most important lines that showcase his personality or thought process.

Example output format (no extra text):
{{
  "theme": "friendship, perseverance",
  "summary": "Naruto and Team 7 are assigned a C-rank mission …",
  "naruto_role_personality_thinking": "Naruto steps up to protect … 'I'll never give up!' showing his determination."
}}
Now produce the JSON for the transcript above.
"""


def analyze_episode(transcript: str) -> dict | None:
    user_msg = PROMPT_TEMPLATE.format(transcript=transcript)
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            resp = openai.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {"role": "system", "content": "You are an expert Naruto analyst."},
                    {"role": "user",   "content": user_msg}
                ],
                temperature=0.2,
                max_tokens=500
            )
            text = resp.choices[0].message.content.strip()
            m = re.search(r"\{[\s\S]*\}", text)
            if not m:
                print("❗ Couldn't parse JSON:", text[:200])
                return None
            return json.loads(m.group(0))

        except Exception as e:
            s = str(e)
            # detect rate-limit by HTTP status 429 in message
            if "429" in s or "rate limit" in s.lower():
                wait = 2 ** attempt
                print(f"⚠️ Rate limit detected, retry {attempt}/{MAX_RETRIES} in {wait}s")
                time.sleep(wait)
                continue
            # other errors → abort this episode
            print("❌ Skipping due to error:", e)
            return None

    print("❌ Skipped after rate-limit retries")
    return None


def build_dataset():
    episodes = sorted(glob(os.path.join(CLEAN_DIR, "*.txt")))
    dataset = []

    for ep_path in episodes:
        ep_name = os.path.basename(ep_path).rsplit(".", 1)[0]
        print(f"→ {ep_name}", end=" ")

        with open(ep_path, encoding="utf-8") as f:
            transcript = f.read().strip()
        if not transcript:
            print("empty, skipped")
            continue

        record = analyze_episode(transcript)
        if record:
            dataset.append(record)
            print("ok")
        else:
            print("skipped")

        time.sleep(PAUSE_BETWEEN_REQ)

    with open(OUTPUT_JSON, "w", encoding="utf-8") as outf:
        json.dump(dataset, outf, ensure_ascii=False, indent=2)

    print(f"\n✅ Dataset written to {OUTPUT_JSON} ({len(dataset)} items)")
    files.download(OUTPUT_JSON)


if __name__ == "__main__":
    build_dataset()


→ Naruto Season 1 - 01 ok
→ Naruto Season 1 - 02 ok
→ Naruto Season 1 - 03 ok
→ Naruto Season 1 - 04 ok
→ Naruto Season 1 - 05 ok
→ Naruto Season 1 - 06 ok
→ Naruto Season 1 - 07 ok
→ Naruto Season 1 - 08 ok
→ Naruto Season 1 - 09 ok
→ Naruto Season 1 - 10 ok
→ Naruto Season 1 - 11 ok
→ Naruto Season 1 - 12 ok
→ Naruto Season 1 - 13 ok
→ Naruto Season 1 - 14 ok
→ Naruto Season 1 - 15 ok
→ Naruto Season 1 - 16 ok
→ Naruto Season 1 - 17 ok
→ Naruto Season 1 - 18 ok
→ Naruto Season 1 - 19 ok
→ Naruto Season 1 - 20 ok
→ Naruto Season 1 - 21 ok
→ Naruto Season 1 - 22 ok
→ Naruto Season 1 - 23 ok
→ Naruto Season 1 - 24 ok
→ Naruto Season 1 - 25 ok
→ Naruto Season 1 - 26 ok
→ Naruto Season 2 - 27 ok
→ Naruto Season 2 - 28 ok
→ Naruto Season 2 - 29 ok
→ Naruto Season 2 - 30 ok
→ Naruto Season 2 - 31 ok
→ Naruto Season 2 - 32 ok
→ Naruto Season 2 - 33 ok
→ Naruto Season 2 - 34 ok
→ Naruto Season 2 - 35 ok
→ Naruto Season 2 - 36 ok
→ Naruto Season 2 - 37 ok
→ Naruto Season 2 - 38 ok
→ Naruto Sea

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
import json

# Load your episode‑level data
with open("naruto_dataset.json", encoding="utf-8") as f:
    episodes = json.load(f)

scenarios = {
    "friendship":  "A friend feels alone and says “Nobody believes in me.”\nNaruto:",
    "perseverance": "Someone is about to give up and says “This is impossible.”\nNaruto:",
    "dreams":      "Someone doubts their dream and says “My dream is pointless.”\nNaruto:",
    "sacrifice":   "Someone is afraid to sacrifice and says “I don’t want to lose what’s important.”\nNaruto:",
    "hardwork" :   "Someone wants motivation to work hard, or they are said they are not destined to be the one they want to be or calls them failure and says“I am incapable of doing it or I am not destined to do it or all the society including my parents and friends call me a failure..”\nNaruto:",
}

persona = "Naruto Uzumaki is optimistic, determined, and fiercely loyal.\n"

with open("finetune.jsonl", "w", encoding="utf-8") as out:
    for ep in episodes:
        naruto_text = ep["naruto_role_personality_thinking"]
        for theme, prompt_body in scenarios.items():
            if theme in ep["theme"]:
                prompt = persona + prompt_body + "\n"
                # Use the core line(s) you extracted as the 'completion'
                completion = " " + naruto_text + " 🟢"  # special token
                record = {"prompt": prompt, "completion": completion}
                out.write(json.dumps(record, ensure_ascii=False) + "\n")


In [2]:
# 1) Install dependencies
!pip install --upgrade transformers bitsandbytes peft datasets

import os
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model

# 2) Paths & config
FINETUNE_FILE = "/content/finetune.jsonl"
OUTPUT_DIR    = "/content/gpt2-8bit-lora1"
MODEL_ID      = "gpt2"            # or "gpt2-medium", etc.
BATCH_SIZE    = 4
EPOCHS        = 7
LEARNING_RATE = 2e-4

os.makedirs(OUTPUT_DIR, exist_ok=True)

# 3) Load the dataset
dataset = load_dataset("json", data_files=FINETUNE_FILE, split="train")

# 4) Initialize tokenizer and 8‑bit model
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    load_in_8bit=True,     # quantize weights to 8‑bit
    device_map="auto"      # automatically place layers on GPU/CPU
)

# 5) Add LoRA adapters
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn"],  # GPT‑2’s attention proj layers
    lora_dropout=0.05,
    bias="none"
)
model = get_peft_model(model, lora_config)

# 6) Tokenize examples: join prompt+completion
def tokenize_fn(ex):
    text = ex["prompt"] + ex["completion"]
    return tokenizer(text, truncation=True, max_length=512)

tokenized = dataset.map(tokenize_fn, remove_columns=["prompt","completion"])

# 7) Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# 8) Training setup
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=1,
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    fp16=True,                 # keeps FP16 for adapters & gradients
    logging_steps=50,
    save_total_limit=2,
    save_steps=200
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    data_collator=data_collator
)

# 9) Fine‑tune!
trainer.train()

# 10) Save only the LoRA adapters and tokenizer
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"✅ 8‑bit + LoRA model saved to {OUTPUT_DIR}")




The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Map:   0%|          | 0/258 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
50,3.8674
100,2.8164
150,2.1909
200,1.8706
250,1.798
300,1.7408
350,1.6785
400,1.6708
450,1.6383




✅ 8‑bit + LoRA model saved to /content/gpt2-8bit-lora1


In [12]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# 1) Load the base 8‑bit model + LoRA adapters
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    load_in_8bit=True,
    device_map="auto"
)
finetuned1="/content/gpt2-8bit-lora"
finetuned2="/content/gpt2-8bit-lora1"
model = PeftModel.from_pretrained(base_model, finetuned1)

tokenizer = AutoTokenizer.from_pretrained(finetuned1)
tokenizer.pad_token = tokenizer.eos_token

# 2) Generate function
def naruto_reply(user_input, max_new_tokens=300):
    persona = "Naruto Uzumaki is optimistic, determined, and fiercely loyal,gives hope to people and motivate people to never quit on your dreams and prove to the people his worth and believes hardwork, determination, frienship is the key for everything.He says Dattebayo at the very end and frequent conversations.His favourite attack jutsu is rasengan which he uses to tell to attack or fight on\n"
    prompt  = f"{persona}A friend says: \"{user_input}\"\nNaruto:"
    inputs  = tokenizer(prompt, return_tensors="pt").to(model.device)
    out     = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=0.9,
        temperature=0.8,
        pad_token_id=tokenizer.eos_token_id
    )
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    return text[len(prompt):].strip()

print(naruto_reply("My teammates gave up on me.So what should I be doing now?"))


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Naruto: Naruto refuses to give up. He says, "I won't let this happen."
Naruto: Naruto shares his belief in perseverance and unwavering determination. He says, 'This is a team effort.'
Naruto: 💙 🙏 💙 🏏 🏙 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 🏏 �


In [16]:
import os
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model
import torch

# 2) Config
MODEL_ID       = "gpt2-medium"                # or any HF model
FINETUNE_FILE  = "/content/finetune.jsonl"
OUTPUT_DIR     = "/content/qlora-gpt2-medium"
BATCH_SIZE     = 4
EPOCHS         = 3
LEARNING_RATE  = 2e-4
MAX_LENGTH     = 512

os.makedirs(OUTPUT_DIR, exist_ok=True)

# 3) Load dataset
dataset = load_dataset("json", data_files=FINETUNE_FILE, split="train")

# 4) Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token  # ensure padding

# 5) Load 4‑bit quantized model
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto"
)


# 6) Inject LoRA adapters
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["c_attn","c_proj"],  # GPT‑2 attention/query/key/value/projector names
    lora_dropout=0.1,
    bias="none"
)
model = get_peft_model(model, peft_config)

# 7) Tokenize function
def tokenize_fn(ex):
    text = ex["prompt"] + ex["completion"]
    return tokenizer(
        text,
        truncation=True,
        max_length=MAX_LENGTH,
        padding="max_length"
    )

tokenized = dataset.map(tokenize_fn, remove_columns=["prompt","completion"])

# 8) Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# 9) Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=1,
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    fp16=True,
    logging_steps=50,
    save_steps=200,
    save_total_limit=2,
    optim="paged_adamw_8bit",      # 8‑bit optimizer for QLoRA
    report_to="none"
)

# 10) Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    data_collator=data_collator
)

# 11) Run training
trainer.train()

# 12) Save LoRA adapters & tokenizer
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("✅ QLoRA‑fine­tuned model saved to", OUTPUT_DIR)


model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Map:   0%|          | 0/258 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
50,2.7116
100,1.5487
150,1.4126


✅ QLoRA‑fine­tuned model saved to /content/qlora-gpt2-medium


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

OUTPUT_DIR = "/content/qlora-gpt2-medium"
MODEL_ID   = "gpt2-medium"  # or whatever base you used

# 1) Reconstruct the 4‑bit quantized base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

base = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto"
)

# 2) Load your fine‑tuned LoRA adapters on top
model = PeftModel.from_pretrained(base, OUTPUT_DIR)

# 3) Reload tokenizer (with your added special tokens/pad token)
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

# 4) Inference helper
def naruto_reply(user_input: str, max_new_tokens: int = 500) -> str:
    persona = "Naruto Uzumaki is optimistic, determined, and fiercely loyal.\n"
    prompt  = f"{persona}A friend says: \"{user_input}\"\nNaruto:"
    inputs  = tokenizer(prompt, return_tensors="pt").to(model.device)
    out     = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=0.9,
        temperature=0.8,
        pad_token_id=tokenizer.eos_token_id
    )
    full = tokenizer.decode(out[0], skip_special_tokens=True)
    # Remove the prompt from the beginning
    return full[len(prompt):].strip()

# 5) Test it
print(naruto_reply("My teammates gave up on me.So what should I be doing now?"))


In [21]:
pip install flask transformers bitsandbytes peft torch



In [29]:
!wget -q -c https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip
!unzip -o ngrok-v3-stable-linux-amd64.zip
!chmod +x ngrok

Archive:  ngrok-v3-stable-linux-amd64.zip
  inflating: ngrok                   


In [30]:
!./ngrok config add-authtoken 2q9gHoz4I2P9TK9966QZ5snqfJ7_6gb71suopfXZf1Q3cENHe

ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [31]:
# 1) Install pyngrok
!pip install pyngrok flask torch transformers bitsandbytes peft accelerate




ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.




ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.


In [None]:
import os
from flask import Flask, request, jsonify
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from pyngrok import ngrok

# — Disable Colab debugpy repr noise —
os.environ["PYDEVD_DISABLE_FILE_OUTPUT"] = "1"

# —— CONFIGURATIONS ——
MODEL_ID    = "gpt2-medium"
ADAPTER_DIR = "/content/qlora-gpt2-medium"
NGROK_PORT  = 5000

# 4‑bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# 1) Load the quantized base model
base = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto"
)

# 2) Load your LoRA adapters
model = PeftModel.from_pretrained(base, ADAPTER_DIR)

# 3) Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_DIR)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

# 4) Create the Flask app
app = Flask(__name__)

@app.route("/generate", methods=["POST"])
def generate():
    data = request.get_json(force=True)
    user_input = data.get("user_input", "").strip()
    if not user_input:
        return jsonify({"error": "Missing 'user_input'"}), 400

    persona = "Naruto Uzumaki is optimistic, determined, and fiercely loyal.\n"
    prompt  = f"{persona}A friend says: \"{user_input}\"\nNaruto:"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    out    = model.generate(
        **inputs,
        max_new_tokens=64,
        do_sample=True,
        top_p=0.9,
        temperature=0.8,
        pad_token_id=tokenizer.eos_token_id
    )

    full  = tokenizer.decode(out[0], skip_special_tokens=True)
    reply = full[len(prompt):].strip()
    return jsonify({"reply": reply})

# 5) Spin up ngrok tunnel
public_url = ngrok.connect(NGROK_PORT)
print(f"🔗  Public URL: {public_url}")

# 6) Run Flask (no reloader)
app.run(host="0.0.0.0", port=NGROK_PORT, debug=False, use_reloader=False)
