<a href="https://colab.research.google.com/github/sumanth-github/mini_transformer/blob/main/llm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install chromadb
!pip install tiktoken


In [None]:
!pip install pyngrok

In [6]:
!ngrok config add-authtoken 2x2czTq2vshQKZ0WK5yTPHJM7Ml_2KSCazfnhEDPtWrnuKHom

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [7]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.47.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [190]:
from pyngrok import ngrok
import streamlit as st

# Write your app.py content
with open("app.py", "w") as f:
    f.write('''
import streamlit as st

st.title("Tiny LLM Chatbot")
user_input = st.text_input("You: ")

if user_input:
    # Dummy response logic, replace with real model
    st.write("Bot:", "This is a placeholder response.")
''')

# Start the Streamlit app
!streamlit run app.py &>/content/logs.txt &

# Expose it with ngrok
public_url = ngrok.connect(8501)
print("Your app is live at:", public_url)


Your app is live at: NgrokTunnel: "https://8531403103f4.ngrok-free.app" -> "http://localhost:8501"


In [173]:
%%writefile config.py
# config.py
MAX_LEN    = 128
D_MODEL    = 64
NUM_HEADS  = 4
D_FF       = 128
NUM_LAYERS = 3
BATCH_SIZE = 8
NUM_EPOCHS = 100
LR         = 5e-4


Overwriting config.py


In [187]:
%%writefile model.py
import torch
import torch.nn as nn

# Positional Encoding Layer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=512):
        super().__init__()
        self.d_model = d_model
        self.max_len = max_len

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer("pe", pe.unsqueeze(0))

    def forward(self, x):
        return self.pe[:, :x.size(1), :].to(x.device)


# One Transformer Encoder Layer
class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
        super().__init__()
        self.attn = nn.MultiheadAttention(d_model, num_heads, dropout=dropout, batch_first=True)
        self.ff = nn.Sequential(
            nn.Linear(d_model, d_ff),
            nn.ReLU(),
            nn.Linear(d_ff, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        attn_out, _ = self.attn(x, x, x)
        x = self.norm1(x + self.dropout(attn_out))
        ff_out = self.ff(x)
        return self.norm2(x + self.dropout(ff_out))

# Full Transformer Encoder (stack of layers)
class TransformerEncoder(nn.Module):
    def __init__(self, vocab_size, d_model, num_heads, d_ff, num_layers, max_seq_len):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size + 1, d_model, padding_idx=0)
        self.pos_encoding = PositionalEncoding(d_model, max_seq_len)
        self.layers = nn.ModuleList([
            TransformerEncoderLayer(d_model, num_heads, d_ff) for _ in range(num_layers)
        ])

    def forward(self, x):
        x = self.embedding(x)
        x = x + self.pos_encoding(x)
        for layer in self.layers:
            x = layer(x)
        return x

# TinyQA Model: Encoder + Decoder (CLS-style output)
class TinyQAModel(nn.Module):
    def __init__(self, encoder, d_model, vocab_size):
        super().__init__()
        self.encoder = encoder
        self.decoder = nn.Linear(d_model, vocab_size)  # Project [CLS] token output to vocab

    def forward(self, x):
        encoded = self.encoder(x)           # shape: (batch, seq_len, d_model)
        return self.decoder(encoded)      # output: (batch, vocab_size)


Overwriting model.py


In [90]:
%%writefile utils.py
import torch
from chromadb import Client
import chromadb.utils.embedding_functions as embedding_functions

from sentence_transformers import SentenceTransformer

# Initialize embedder
embedder = SentenceTransformer('all-MiniLM-L6-v2')
chroma_client = Client()
chroma_collection = chroma_client.get_or_create_collection("rag_memory")

def embed_chunks(chunks):
    embeddings = embedder.encode(chunks, show_progress_bar=True)
    ids = [f"id_{i}" for i in range(len(chunks))]
    chroma_collection.add(documents=chunks, embeddings=embeddings, ids=ids)
    return len(chunks)

def build_rag_prompt(user_query, top_k=3):
    from sentence_transformers import SentenceTransformer
    import chromadb
    chroma_client = chromadb.Client()
    collection = chroma_client.get_or_create_collection("rag_memory")

    embedder = SentenceTransformer("all-MiniLM-L6-v2")
    query_embedding = embedder.encode([user_query])[0]

    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k
    )

    context_chunks = results["documents"][0]
    context = "\n\n".join(context_chunks)

    return f"Context:\n{context}\n\nQuestion: {user_query}\nAnswer:"



class SimpleTokenizer:
    def __init__(self):
        self.word2idx = {"<PAD>": 0, "<UNK>": 1, "<EOS>": 2}
        self.idx2word = {0: "<PAD>", 1: "<UNK>", 2: "<EOS>"}
        self.vocab_size = 3

    def fit(self, texts):
        for text in texts:
            for word in text.strip().split():
                if word not in self.word2idx:
                    idx = len(self.word2idx)
                    self.word2idx[word] = idx
                    self.idx2word[idx] = word
        self.vocab_size = len(self.word2idx)

    def encode(self, text, max_len=None, eos=True):
        tokens = [self.word2idx.get(word, 1) for word in text.strip().split()]
        if eos:
            tokens.append(2)
        if max_len:
            tokens = tokens[:max_len] + [0] * max(0, max_len - len(tokens))
        return tokens

    def decode(self, tokens):
        return " ".join([self.idx2word.get(token, "<UNK>") for token in tokens if token != 0])

    def get_pad_id(self):
        return 0

    def get_eos_id(self):
        return 2

def prepare_data(pairs, tokenizer, max_len, pad_id):
    inputs, targets = [], []
    for q, a in pairs:
        q_ids = tokenizer.encode(q)[:max_len//2]
        a_ids = tokenizer.encode(a)[:max_len//2]
        if hasattr(tokenizer, 'sp'):
            eos_id = tokenizer.sp.eos_id()
        else:
            eos_id = 1  # fallback for SimpleTokenizer (or define a constant)

        input_seq = q_ids + [eos_id]
        target_seq = a_ids + [eos_id]

        # Pad sequences
        input_seq += [pad_id] * (max_len - len(input_seq))
        target_seq += [pad_id] * (max_len - len(target_seq))

        inputs.append(input_seq)
        targets.append(target_seq)

    X = torch.tensor(inputs, dtype=torch.long)
    y = torch.tensor(targets, dtype=torch.long)
    return X, y




class BPETokenizer:
    def __init__(self, model_file="chatbot_bpe.model"):
        import sentencepiece as spm
        self.sp = spm.SentencePieceProcessor()
        self.sp.load(model_file)

    def encode(self, text, max_len=None):
        tokens = self.sp.encode(text, out_type=int)
        if max_len:
            tokens = tokens[:max_len] + [0] * max(0, max_len - len(tokens))
        return tokens

    def decode(self, token_ids):
        return self.sp.decode(token_ids).replace("▁", " ").strip()

    @property
    def vocab_size(self):
        return self.sp.get_piece_size()



Overwriting utils.py


In [150]:
import pickle
import json

In [151]:
from google.colab import files
uploaded = files.upload()


Saving augmented_qa_dataset.json to augmented_qa_dataset.json


In [152]:
import json

with open("augmented_qa_dataset.json", "r") as f:
    qa_pairs = json.load(f)

with open("bpe_train.txt", "w") as f_out:
    for q, a in qa_pairs:
        f_out.write(q.strip() + "\n")
        f_out.write(a.strip() + "\n")


In [186]:
!pip install sentencepiece
import sentencepiece as spm

spm.SentencePieceTrainer.train(
    input="bpe_train.txt",
    model_prefix="chatbot_bpe",
    vocab_size=800,
    model_type="bpe",
    character_coverage=1.0,
    pad_id=0,
    unk_id=1,
    bos_id=-1,  # disables BOS
    eos_id=2,
    hard_vocab_limit=False
)




In [182]:
%%writefile train.py
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from model import TransformerEncoder, TinyQAModel
from utils import BPETokenizer, prepare_data

# ==== Load the full dataset ====
import json
with open("augmented_qa_dataset.json", "r") as f:
    data = json.load(f)


print(f"Loaded {len(data)} Q&A pairs")

# Add some basic conversational examples
conversational_data = [
    ("Hi", "Hello!"),
    ("Hello", "Hi there!"),
    ("Hey", "Hey! How can I help you?"),
    ("How are you?", "I'm doing well, thanks for asking!"),
    ("What's your name?", "I'm TinyBot, your AI assistant."),
    ("Thank you", "You're welcome!"),
    ("Thanks", "Happy to help!"),
    ("Bye", "Goodbye!"),
    ("Goodbye", "See you later!"),
]

# Combine datasets
data = conversational_data + data  # Use first 100 from dataset to keep training manageable

# ==== Hyperparameters ====
max_len = 128
d_model = 64  # Increased for better capacity
num_heads = 4
d_ff = 128
num_layers = 3  # More layers for better understanding
batch_size = 8
num_epochs = 100  # More epochs for better learning
learning_rate = 5e-4

# ==== Tokenizer & Data ====
tokenizer = BPETokenizer("chatbot_bpe.model")
eos_id = tokenizer.sp.eos_id()
pad_id = tokenizer.sp.pad_id()
vocab_size=tokenizer.sp.get_piece_size()
print(f"🧠 Vocab size: {vocab_size}, PAD: {pad_id}, EOS: {eos_id}")

# Prepare data
X, y = prepare_data(data, tokenizer, max_len, pad_id)
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# ==== Model ====
encoder = TransformerEncoder(vocab_size, d_model, num_heads, d_ff, num_layers, max_seq_len=max_len)
model = TinyQAModel(encoder, d_model, vocab_size)

# ==== Training ====
criterion = nn.CrossEntropyLoss(ignore_index=pad_id)  # Ignore padding
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

loss_history = []
print("Starting training...")

for epoch in range(num_epochs):
    total_loss = 0
    model.train()

    for batch_x, batch_y in loader:
        logits = model(batch_x)  # shape: [batch, seq_len, vocab_size]
        logits = logits.view(-1, vocab_size)  # [batch * seq_len, vocab_size]
        targets = batch_y.view(-1)            # [batch * seq_len]

        # Calculate loss
        loss = criterion(logits, targets)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
        optimizer.step()

        total_loss += loss.item()

    scheduler.step()
    avg_loss = total_loss / len(loader)
    loss_history.append(avg_loss)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}: Loss = {avg_loss:.4f}, LR = {scheduler.get_last_lr()[0]:.6f}")

# Save model and tokenizer
torch.save(model.state_dict(), "tinyqa_model.pth")
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

print("✅ Training complete. Model saved.")

# Test the model
print("\n🧪 Testing the model:")
test_questions = ["Hi", "What's your name?", "Tell me a joke", "Thanks"]
model.eval()

with torch.no_grad():
    for q in test_questions:
        input_ids = tokenizer.encode(q)[:max_len]
        input_len=len(input_ids)
        generated = input_ids + [pad_id] * (max_len - len(input_ids))

        for step in range(20):
            input_tensor = torch.tensor([generated[:max_len]], dtype=torch.long)
            logits = model(input_tensor)
            next_pos = input_len + step
            if next_pos >= max_len:
                break
            token_logits = logits[0, next_pos - 1, :]
            next_token_id = torch.argmax(token_logits).item()
            if next_token_id in [pad_id, eos_id]:
                break
            generated[next_pos] = next_token_id

        answer_ids = generated[input_len:next_pos]
        answer = tokenizer.decode(answer_ids) if answer_ids else "[No response]"
        print(f"Q: {q} → A: {answer}")

Overwriting train.py


In [192]:
!python train.py


2025-08-02 15:17:16.234506: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754147836.307637  100776 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754147836.330150  100776 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Loaded 44 Q&A pairs
🧠 Vocab size: 800, PAD: 0, EOS: 2
Starting training...
Epoch 10/100: Loss = 5.1903, LR = 0.000500
Epoch 20/100: Loss = 4.4705, LR = 0.000250
Epoch 30/100: Loss = 4.1436, LR = 0.000250
Epoch 40/100: Loss = 3.8452, LR = 0.000125
Epoch 50/100: Loss = 3.6576, LR = 0.000125
Epoch 60/100: Loss = 3.4903, LR = 0.000063
Epoch 70/100: Loss = 3.4326, LR = 0.000063
Epoch 80/100: Loss = 3.3602, LR = 0.000031
Epoch 90/100: Loss

In [189]:
%%writefile inference.py
import torch
import torch.nn.functional as F
import pickle
from model import TransformerEncoder, TinyQAModel
from utils import BPETokenizer

# ==== Load tokenizer & hyperparams ====
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

# Must match training parameters
max_len = 128
d_model = 64
num_heads = 4
d_ff = 128
num_layers = 3

# ==== Load model ====
vocab_size = tokenizer.sp.get_piece_size()
encoder = TransformerEncoder(vocab_size, d_model, num_heads, d_ff, num_layers, max_seq_len=max_len)
model = TinyQAModel(encoder, d_model, vocab_size)

# Load state dict
state_dict = torch.load("tinyqa_model.pth", map_location=torch.device('cpu'))
for k, v in state_dict.items():
    if 'embedding.weight' in k:
        print(f"📏 From checkpoint: {k} = {v.shape}")
print(f"📏 Your model: embedding.weight = {model.encoder.embedding.weight.shape}")

state_dict = {k: v for k, v in state_dict.items() if not k.startswith("encoder.pos_encoding.pe")}
model.load_state_dict(state_dict, strict=False)
model.eval()

def clean_decode(tokens, tokenizer):
    words = tokenizer.decode(tokens).split()
    cleaned = []
    for w in words:
        if w not in ("<PAD>", "<UNK>", "NULL"):
            if not cleaned or cleaned[-1] != w:
                cleaned.append(w)
    return " ".join(cleaned).strip()

def answer_question(model, tokenizer, prompt: str, max_gen_len=50, temperature=0.7, max_len=128):
    persona_prompt = "You are a witty and helpful assistant. Answer briefly.\n"
    input_text = persona_prompt + prompt
    input_ids = tokenizer.encode(input_text)[:max_len]
    input_len = len([x for x in input_ids if x != 0])

    generated = input_ids.copy()
    eos_id = tokenizer.sp.eos_id()
    pad_id = tokenizer.sp.pad_id()

    with torch.no_grad():
        for step in range(max_gen_len):
            input_tensor = torch.tensor([generated[:max_len]], dtype=torch.long)
            logits = model(input_tensor)
            pos = len(generated) - 1
            if pos >= max_len - 1:
                break

            next_token_logits = logits[0, pos, :] / temperature
            probs = F.softmax(next_token_logits, dim=-1)
            next_token_id = torch.multinomial(probs, 1).item()

            print(f"🔎 Step {step}: token {next_token_id} → '{tokenizer.decode([next_token_id])}'")

            if next_token_id == eos_id or next_token_id == pad_id:
                print("🚩 Decoding stopped (EOS or PAD)")
                break

            generated.append(next_token_id)

    answer_ids = [token for token in generated[input_len:] if token != pad_id and token != 0]
    answer = None

    if answer_ids:
        raw_answer = tokenizer.decode(answer_ids)
        cleaned = clean_decode(answer_ids, tokenizer)

        print(f"🦪 Raw decoded: '{raw_answer}'")
        print(f"🦹 Cleaned answer: '{cleaned}'")

        if not cleaned.strip():
            answer = raw_answer.strip() or "[empty]"
        elif cleaned.lower() in ["null", "none", "pad", "<pad>", "<unk>"]:
            answer = raw_answer.strip()
        else:
            answer = cleaned
    else:
        answer = raw_answer.strip() if 'raw_answer' in locals() else "[no output]"

    return answer

def load_model(model_path="tinyqa_model.pth", tokenizer_path="chatbot_bpe.model", max_len=128,
               d_model=64, num_heads=4, d_ff=128, num_layers=3, device="cpu"):
    with open("tokenizer.pkl", "rb") as f:
        tokenizer = pickle.load(f)

    vocab_size = tokenizer.sp.get_piece_size()
    encoder = TransformerEncoder(vocab_size, d_model, num_heads, d_ff, num_layers, max_seq_len=max_len)
    model = TinyQAModel(encoder, d_model, vocab_size)

    state_dict = torch.load(model_path, map_location=torch.device(device))
    state_dict = {k: v for k, v in state_dict.items() if not k.startswith("encoder.pos_encoding.pe")}
    model.load_state_dict(state_dict, strict=False)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    return model, tokenizer

if __name__ == "__main__":
    print("🤖 TinyBot is ready! Type 'exit' to quit.\n")
    test_examples = ["Hi", "What's your name?", "Thanks"]
    print("Testing with examples:")
    for ex in test_examples:
        answer = answer_question(model, tokenizer, ex, temperature=0.5)
        print(f"Q: {ex}")
        print(f"A: {answer}\n")

    print("\n" + "="*50 + "\n")

    while True:
        q = input("You: ")
        if q.lower() in ['exit', 'quit', 'bye']:
            print("Bot: Goodbye!")
            break

        answer = answer_question(model, tokenizer, q, temperature=0.5)
        print(f"Bot: {answer}\n")


Overwriting inference.py


In [195]:
%%writefile app.py
import streamlit as st
import pickle
import json
import datetime
from inference import answer_question,load_model
from utils import BPETokenizer
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the model and tokenizer
model, tokenizer = load_model(
    model_path="tinyqa_model.pth",
    tokenizer_path="chatbot_bpe.model",
    max_len=128,
    d_model=64,
    num_heads=4,
    d_ff=128,
    num_layers=3,
    device=device
)


# ------------------ Session State ------------------
def init_session():
    if "history" not in st.session_state:
        st.session_state.history = []

# ------------------ Sidebar ------------------
def render_sidebar():
    st.sidebar.title("🧠 Tiny Transformer")
    st.sidebar.markdown("Crafted by: **Sumanth Kadarla**")
    st.sidebar.markdown("🎓 B.Tech CSE (AIML), Tier-3")
    st.sidebar.markdown("📍 India | 🚀 FAANG-bound")
    st.sidebar.markdown("---")
    return st.sidebar.radio("📂 Navigate", [
        "🏠 Home",
        "💬 QA Chat",
        "📜 History",
        "📚 Dataset",
        "📊 Training Monitor",
        "⚙️ Model Settings",
        "🧠 Architecture",
        "📘 About"
    ])
    st.sidebar.subheader("📂 Load Saved Model")
    model_file = st.sidebar.file_uploader("Upload Model (.pth)", type="pth")
    tokenizer_file = st.sidebar.file_uploader("Upload Tokenizer (.model)", type="model")

    if model_file and tokenizer_file:
        with open("uploaded_model.pth", "wb") as f:
            f.write(model_file.read())
        with open("uploaded_tokenizer.model", "wb") as f:
            f.write(tokenizer_file.read())

        st.session_state.custom_model = "uploaded_model.pth"
        st.session_state.custom_tokenizer = "uploaded_tokenizer.model"
        st.sidebar.success("✅ Model & Tokenizer loaded!")


# ------------------ Home ------------------
def render_home():
    st.title("🏠 Welcome to TinyTransformer QA Playground")
    st.markdown("""
    A minimal transformer-based chatbot project to learn how GPT-like models work under the hood.

    - Built from scratch using PyTorch
    - Streamlit-powered dashboard
    - Simple token-based Q&A transformer
    """)
    st.markdown("---")
    st.subheader("🧭 Navigate using the sidebar to explore features")

# ------------------ Chat ------------------
def render_qa_chat():
    st.title("💬 Ask a Question")
    tab1, tab2 = st.tabs(["🧠 Ask", "🔍 Explanation (coming soon)"])

    with tab1:
        user_input = st.text_input("Type your question:")
        if st.button("🎯 Get Answer") and user_input:
            prompt = user_input
            answer = answer_question(model, tokenizer,prompt).strip()
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            st.session_state.history.append({
                "question": user_input,
                "answer": answer,
                "timestamp": timestamp
            })
            st.success(f"🤖 {answer}")

    with tab2:
        st.info("Model explanation and token-wise breakdown will go here in a future update!")


# ------------------ History ------------------
def render_history():
    st.title("📜 Conversation History")
    if st.session_state.history:
        for item in st.session_state.history[::-1]:
            with st.expander(f"🕒 {item['timestamp']}"):
                st.markdown(f"**Q:** {item['question']}")
                st.markdown(f"**A:** {item['answer']}")
    else:
        st.info("No conversation history yet.")

    if st.button("💾 Save Chat"):
        with open("chat_history.json", "w") as f:
            json.dump(st.session_state.history, f)
        st.success("Saved to chat_history.json")

    uploaded_file = st.file_uploader("📂 Load Previous History", type="json")
    if uploaded_file:
        st.session_state.history.extend(json.load(uploaded_file))
        st.success("History loaded.")

# ------------------ Dataset ------------------
def render_dataset():
    import pandas as pd

    st.title("📚 Dataset Viewer")
    tab1, tab2 = st.tabs(["🔍 Preview", "📥 Upload"])

    with tab1:
        st.markdown("### 📄 Dataset Preview")

        if "uploaded_df" in st.session_state:
            st.success("Showing uploaded dataset:")
            st.dataframe(st.session_state.uploaded_df)
        else:
            st.info("No dataset uploaded yet. Showing static preview instead.")
            st.table([
                {"Question": "What is AI?", "Answer": "Artificial Intelligence"},
                {"Question": "Who is Elon Musk?", "Answer": "Entrepreneur"},
                {"Question": "What is Python?", "Answer": "Programming"},
            ])

    with tab2:
        import chromadb
        from sentence_transformers import SentenceTransformer

        st.header("📂 Upload & Embed Your Knowledge")

        if "doc_chunks" not in st.session_state:
            st.session_state.doc_chunks = []

        embedder = SentenceTransformer("all-MiniLM-L6-v2")
        chroma_client = chromadb.Client()
        if "rag_memory" not in [c.name for c in chroma_client.list_collections()]:
            collection = chroma_client.create_collection("rag_memory")
        else:
            collection = chroma_client.get_collection("rag_memory")

        data_file = st.file_uploader("Upload a .txt file for knowledge base", type=["txt"])
        if data_file:
          try:

              df = pd.read_csv(data_file, sep="\t")

              # Basic validation
              if "Question" in df.columns and "Answer" in df.columns:
                  st.session_state.uploaded_df = df[["Question", "Answer"]].dropna()
                  st.success(f"✅ Uploaded dataset with {len(st.session_state.uploaded_df)} valid QA pairs.")
              else:
                  st.error("❌ The uploaded TSV must contain 'Question' and 'Answer' columns.")
          except Exception as e:
              st.error(f"❌ Failed to read TSV file: {e}")




# ------------------ Training Monitor ------------------
def render_training():
    import pandas as pd
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import pickle
    from torch.utils.data import DataLoader, TensorDataset
    from utils import SimpleTokenizer, prepare_data
    from model import TransformerEncoder, TinyQAModel
    from sklearn.model_selection import train_test_split

    st.title("📊 Train Model Inside Streamlit")

    if "uploaded_df" not in st.session_state:
        st.warning("Please upload a dataset first in the 📚 Dataset tab.")
        return

    df = st.session_state.uploaded_df
    df.columns = [col.lower() for col in df.columns]

    # Detect columns
    if "question" in df.columns and "answer" in df.columns:
        q_col, a_col = "question", "answer"
    elif "text" in df.columns and "response" in df.columns:
        q_col, a_col = "text", "response"
    else:
        st.error("Dataset must contain either ['question', 'answer'] or ['text', 'response']")
        return

    st.subheader("⚙️ Training Configuration")
    epochs = st.slider("Epochs", 1, 20, 5)
    batch_size = st.selectbox("Batch Size", [2, 4, 8, 16], index=1)
    lr = st.number_input("Learning Rate", value=1e-3, format="%.5f")
    max_len = st.slider("Max Token Length", 10, 100, 50)
    st.subheader("⚙️ Transformer Hyperparameters")

# 🧩 These are OUTSIDE the if-condition
    d_model = st.slider("Model Size (d_model)", 16, 128, 32, step=16)
    num_heads = st.slider("Number of Attention Heads", 1, 8, 2)
    num_layers = st.slider("Number of Encoder Layers", 1, 6, 2)
    d_ff = st.slider("Feedforward Hidden Dim (d_ff)", 32, 256, 64, step=32)
    max_len = st.slider("Max Sequence Length", 10, 100, 50)

    # Optional model summary live preview
    st.code(f"""
    TinyQAModel(
      encoder = TransformerEncoder(
        layers = {num_layers},
        heads = {num_heads},
        d_model = {d_model},
        d_ff = {d_ff},
        max_len = {max_len}
      ),
      decoder = nn.Linear({d_model} → vocab)
    )
    """, language="python")

    if st.button("🚀 Train Model"):
        df = df[df[q_col].notna() & df[a_col].notna()]
        df = df.astype({q_col: str, a_col: str})
        uploaded_data = list(zip(df[q_col], df[a_col]))

        chat_data = [
            ("Hi", "Hello!"),
            ("Hey there", "Hi!"),
            ("How are you?", "I'm doing well, thanks!"),
            ("What's your name?", "I'm your chatbot."),
            ("What can you do?", "I answer questions."),
            ("Who made you?", "A student learning AI."),
            ("Tell me a joke", "Why did the computer get cold? It left its Windows open!"),
            ("Nice joke", "Glad you liked it!"),
            ("Do you like pizza?", "I love data... but pizza sounds good too."),
            ("What is AI?", "Artificial Intelligence."),
            ("What is ML?", "Machine Learning."),
            ("Tell me something cool", "Transformers power modern AI like ChatGPT!"),
            ("Can you help me?", "Of course. What do you need?"),
            ("Are you alive?", "Not yet 😄"),
            ("Bye", "Goodbye!"),
            ("Thanks", "You're welcome!"),
            ("What’s 2 + 2?", "It’s 4."),
            ("What's the capital of India?", "New Delhi."),
            ("Can you sing?", "I can rhyme in binary."),
            ("What is Python?", "A programming language.")
        ]


        # Combine base and uploaded dataset
        combined_data = chat_data + uploaded_data

        # ⛔ Clean
        combined_data = [(q, a) for q, a in combined_data if isinstance(q, str) and isinstance(a, str)]

        # ✅ Split
        train_data, val_data = train_test_split(combined_data, test_size=0.2, random_state=42)

        # Fit tokenizer on **all** data
        tokenizer = BPETokenizer("chatbot_bpe.model")
        # Prepare tensors
        X_train, y_train = prepare_data(train_data, tokenizer, max_len)
        X_val, y_val = prepare_data(val_data, tokenizer, max_len)

        train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)

        vocab_size = tokenizer.sp.get_piece_size()

        encoder = TransformerEncoder(vocab_size, d_model, num_heads, d_ff, num_layers, max_seq_len=max_len)
        model = TinyQAModel(encoder, d_model, vocab_size)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr)
        loss_history = []

        progress_bar = st.progress(0)
        status_text = st.empty()
        train_loss_history = []
        val_loss_history = []
        val_acc_history = []

        for epoch in range(epochs):
          model.train()
          total_loss = 0
          for batch_x, batch_y in train_loader:
              logits = model(batch_x)
              logits = logits.view(-1, vocab_size)
              targets = batch_y.view(-1)

              loss = criterion(logits, targets)
              loss.backward()
              optimizer.step()
              optimizer.zero_grad()
              total_loss += loss.item()

          avg_train_loss = total_loss / len(train_loader)
          train_loss_history.append(avg_train_loss)

          # 🔍 VALIDATION
          model.eval()
          val_loss = 0
          correct = 0
          total = 0
          with torch.no_grad():
              for val_x, val_y in val_loader:
                  logits = model(val_x)
                  logits = logits.view(-1, vocab_size)
                  targets = val_y.view(-1)
                  loss = criterion(logits, targets)
                  val_loss += loss.item()

                  preds = torch.argmax(logits, dim=1)
                  correct += (preds == targets).sum().item()
                  total += targets.size(0)

          avg_val_loss = val_loss / len(val_loader)
          val_acc = correct / total

          val_loss_history.append(avg_val_loss)
          val_acc_history.append(val_acc)

          progress_bar.progress((epoch + 1) / epochs)
          status_text.text(
              f"Epoch {epoch + 1}/{epochs} | "
              f"Train Loss: {avg_train_loss:.4f} | "
              f"Val Loss: {avg_val_loss:.4f} | "
              f"Val Acc: {val_acc:.2%}"
          )
        torch.save(model.state_dict(), "tinyqa_model.pth")
        with open("tokenizer.pkl", "wb") as f:
            pickle.dump(tokenizer, f)
        misclassified = []

        with torch.no_grad():
            for val_x, val_y in val_loader:
                logits = model(val_x)
                preds = torch.argmax(logits, dim=1)
                for x, y_true, y_pred in zip(val_x, val_y, preds):
                    if y_true != y_pred:
                        question = tokenizer.decode([t for t in x.tolist() if t != 0])
                        answer = tokenizer.decode([y_true.item()])
                        predicted = tokenizer.decode([y_pred.item()])
                        misclassified.append((question, answer, predicted))

        if misclassified:
            st.subheader("❌ Misclassified Examples")
            for q, true_a, pred_a in misclassified[:5]:  # limit to 5 for speed
                st.markdown(f"**Q:** {q}")
                st.markdown(f"**True A:** {true_a} | **Pred A:** {pred_a}")
                st.markdown("---")

        st.success("✅ Model trained and saved!")
        st.subheader("📉 Training Progress")

        chart_data = {
            "Train Loss": train_loss_history,
            "Val Loss": val_loss_history,
            "Val Accuracy": val_acc_history
        }
        st.line_chart(chart_data)
        model_name = st.text_input("Model name to save (no extension)", "tinyqa_bpe")
        if st.button("💾 Save Model"):
            torch.save(model.state_dict(), f"{model_name}.pth")
            with open(f"{model_name}_tokenizer.model", "wb") as f:
                f.write(open("chatbot_bpe.model", "rb").read())
            st.success(f"Model and tokenizer saved as {model_name}.pth and {model_name}_tokenizer.model")





# ------------------ Settings ------------------
def render_settings():
    st.title("⚙️ Model Settings")
    model_path = st.session_state.get("custom_model", "tinyqa_bpe_model.pth")
    tokenizer_path = st.session_state.get("custom_tokenizer", "chatbot_bpe.model")


    st.info(f"""
    **Current Model**: `{model_path}`
    **Tokenizer**: `{tokenizer_path}`
    """)

    if st.button("🔁 Reset to Default"):
        st.session_state.custom_model = "tinyqa_bpe_model.pth"
        st.session_state.custom_tokenizer = "chatbot_bpe.model"
        st.success("Reset to default model.")
# ------------------ Architecture ------------------
def render_architecture():
    st.title("🧠 Model Architecture")
    st.code("""
TinyQAModel(
  encoder = TransformerEncoder(...),
  decoder = nn.Linear(d_model → vocab)
)
    """, language="python")
    st.markdown("Heads: 2 | Layers: 2 | d_model: 32 | max_len: 5")

# ------------------ About ------------------
def render_about():
    st.title("📘 About This App")
    st.markdown("""
This app is a **mini Transformer QA bot** built from scratch using PyTorch.

**Built by:** Sumanth
**Degree:** B.Tech CSE (AIML), Tier-3
**Current Focus:** ML Engineering | Strategic AI Roles
    """)
    st.info("Built for learning and showcasing LLM mechanics. Updates coming soon!")
    st.markdown("---")
    st.markdown("💡 [GitHub Repo](#) | 🧠 Powered by PyTorch + Streamlit + SentencePiece")

# ------------------ Main ------------------
def main():
    st.set_page_config(page_title="TinyQA Bot", layout="wide", initial_sidebar_state="collapsed")
    init_session()
    section = render_sidebar()

    if section == "🏠 Home":
        render_home()
    elif section == "💬 QA Chat":
        render_qa_chat()
    elif section == "📜 History":
        render_history()
    elif section == "📚 Dataset":
        render_dataset()
    elif section == "📊 Training Monitor":
        render_training()
    elif section == "⚙️ Model Settings":
        render_settings()
    elif section == "🧠 Architecture":
        render_architecture()
    elif section == "📘 About":
        render_about()

if __name__ == "__main__":
    main()


Overwriting app.py


In [29]:
!pip install sentencepiece



In [177]:
%%writefile diagnostic_script.py
import torch
import pickle
from model import TransformerEncoder, TinyQAModel
from utils import BPETokenizer

print("🔍 Running diagnostics...")

# Load tokenizer
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

print(f"✓ Tokenizer loaded")
print(f"  Vocab size: {tokenizer.vocab_size}")
print(f"  EOS token ID: {tokenizer.sp.eos_id()}")
print(f"  PAD token ID: {tokenizer.vocab_size}")

# Test tokenizer
test_text = "Hello world"
encoded = tokenizer.encode(test_text)
decoded = tokenizer.decode(encoded)
print(f"\n✓ Tokenizer test:")
print(f"  Original: '{test_text}'")
print(f"  Encoded: {encoded}")
print(f"  Decoded: '{decoded}'")

# Load model
max_len = 128
d_model = 64
num_heads = 4
d_ff = 128
pad_id=800
num_layers = 2
vocab_size = tokenizer.sp.get_piece_size()

encoder = TransformerEncoder(vocab_size, d_model, num_heads, d_ff, num_layers, max_seq_len=max_len)
model = TinyQAModel(encoder, d_model, vocab_size)

print(f"\n✓ Model created")
print(f"  Total parameters: {sum(p.numel() for p in model.parameters()):,}")

# Load weights
try:
    state_dict = torch.load("tinyqa_model.pth", map_location=torch.device('cpu'))
    model.load_state_dict(state_dict,strict=False)
    model.eval()
    print(f"✓ Model weights loaded successfully")
except Exception as e:
    print(f"✗ Error loading model weights: {e}")
    exit(1)

# Test forward pass
print(f"\n🧪 Testing model forward pass...")
test_input = "Hi"
pad_id = tokenizer.sp.get_piece_size() - 1
input_ids = tokenizer.encode(test_input)[:25] + [tokenizer.sp.eos_id()]
padded = input_ids + [pad_id] * (max_len - len(input_ids))
input_tensor = torch.tensor([padded], dtype=torch.long)

print(f"  Input text: '{test_input}'")
print(f"  Input IDs: {input_ids}")
print(f"  Input shape: {input_tensor.shape}")

with torch.no_grad():
    output = model(input_tensor)
    logits = output
    predicted_ids = torch.argmax(logits, dim=-1)
    decoded_input = predicted_ids[0].tolist() if predicted_ids.ndim == 2 else predicted_ids.tolist()
    response = tokenizer.decode(decoded_input)

    print(f"  Output shape: {output.shape}")

    # Check predictions at each position
    print(f"\n  Token predictions:")
    for i in range(len(input_ids)):
        token_logits = output[0, i, :]
        top_tokens = torch.topk(token_logits, 5)
        top_ids = top_tokens.indices.tolist()
        top_probs = torch.softmax(top_tokens.values, dim=-1).tolist()

        print(f"    Position {i}: {tokenizer.decode([padded[i]])} ->")
        for tid, prob in zip(top_ids, top_probs):
            decoded = tokenizer.decode([tid])
            print(f"      {decoded}: {prob:.3f}")

# Test generation
print(f"\n🧪 Testing generation...")

def debug_generate(question):
    q_ids = tokenizer.encode(question)[:25]
    input_ids = q_ids + [tokenizer.sp.eos_id()]

    print(f"  Question: '{question}'")
    print(f"  Question IDs: {q_ids}")

    generated = []
    with torch.no_grad():
        for step in range(10):  # Generate up to 10 tokens
            current_seq = input_ids + generated
            padded = current_seq + [tokenizer.sp.pad_id()] * (max_len - len(current_seq))
            input_tensor = torch.tensor([padded], dtype=torch.long)

            output = model(input_tensor)
            next_pos = len(current_seq) - 1

            if next_pos >= max_len - 1:
                break

            logits = output[0, next_pos, :]
            next_token = torch.argmax(logits).item()

            print(f"    Step {step}: pos={next_pos}, next_token={next_token} ('{tokenizer.decode([next_token])}')")

            if next_token == tokenizer.sp.eos_id() or next_token == tokenizer.sp.pad_id():
                print(f"    Stopping: hit EOS/PAD")
                break

            generated.append(next_token)

    if generated:
        answer = tokenizer.decode(generated)
        print(f"  Generated tokens: {generated}")
        print(f"  Answer: '{answer}'")
    else:
        print(f"  No tokens generated!")

# Test with different questions
for q in ["Hi", "Hello", "What's your name?"]:
    print(f"\n{'='*50}")
    debug_generate(q)
from inference import answer_question

print("\n🚨 Testing live inference:")
qs = ["Hi", "Hello", "What's your name?"]
for q in qs:
    print(f"Q: {q}")
    print(f"A: {answer_question(model,tokenizer,q)}\n")

print("\n✅ Diagnostics complete!")

Overwriting diagnostic_script.py


In [156]:
!python diagnostic_script.py


2025-08-02 14:19:18.565658: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754144358.590390   86646 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754144358.597695   86646 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
🔍 Running diagnostics...
✓ Tokenizer loaded
  Vocab size: 800
  EOS token ID: 2
  PAD token ID: 800

✓ Tokenizer test:
  Original: 'Hello world'
  Encoded: [335, 30, 283]
  Decoded: 'Hello world'

✓ Model created
  Total parameters: 170,337
✓ Model weights loaded successfully

🧪 Testing model forward pass...
  Input text: 'Hi'
  Input IDs: [251, 2]
  Input shape: torch.Size([1, 128])
  Output shape: torch.Size([1, 128, 801])

  Token

In [130]:
!rm chatbot_bpe.*  # Delete old model if needed
