# AI Paper Summarizer & Q&A System



## Step 1: Install Dependencies

In [1]:
# Install required packages
!pip install -q transformers datasets accelerate peft bitsandbytes
!pip install -q rouge-score nltk evaluate
!pip install -q fastapi uvicorn pyngrok
!pip install -q PyPDF2 pypdf pyyaml python-dotenv

print("✓ All packages installed successfully!")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h✓ All packages installed successfully!


## Step 2: Setup Project Structure

In [2]:
import os
import sys

# Create directory structure
directories = [
    'config',
    'src',
    'data/raw',
    'data/processed',
    'data/datasets',
    'models',
    'outputs',
    'logs',
    'cache'
]

for directory in directories:
    os.makedirs(directory, exist_ok=True)
    
print("✓ Directory structure created!")

✓ Directory structure created!


## Step 3: Create Configuration File

In [3]:
%%writefile config.yaml
# Configuration for AI Paper Summarizer & Q&A System

model:
  summarizer:
    base_model: "sshleifer/distilbart-cnn-12-6"
    max_length: 512
    min_length: 100
    num_beams: 4
    
  qa:
    base_model: "distilbert-base-uncased-distilled-squad"
    max_length: 384
    doc_stride: 128
    
  lora:
    r: 16
    lora_alpha: 32
    lora_dropout: 0.1
    bias: "none"
    task_type: "SEQ_2_SEQ_LM"

training:
  summarizer:
    num_epochs: 3
    batch_size: 4
    gradient_accumulation_steps: 4
    learning_rate: 3e-4
    warmup_steps: 500
    weight_decay: 0.01
    max_grad_norm: 1.0
    fp16: true
    
  qa:
    num_epochs: 2
    batch_size: 8
    gradient_accumulation_steps: 2
    learning_rate: 5e-5
    warmup_steps: 300
    weight_decay: 0.01
    max_grad_norm: 1.0
    fp16: true

data:
  max_samples: 5000
  train_split: 0.9
  val_split: 0.1
  max_input_length: 1024
  max_target_length: 256
  
paths:
  data_dir: "/kaggle/input"
  output_dir: "./outputs"
  model_save_dir: "./models"
  logs_dir: "./logs"
  cache_dir: "./cache"

deployment:
  port: 8000
  host: "0.0.0.0"
  ngrok_token: ""
  
evaluation:
  rouge_types: ["rouge1", "rouge2", "rougeL"]
  save_predictions: true

Writing config.yaml


## Step 4: Create Python Modules

Copy the utils.py, data_preparation.py, model_training.py, inference.py, and deployment.py files into the working directory or use the %%writefile magic to create them.

In [4]:
# Copy all Python files from input to working directory
!cp /kaggle/input/allscripts/*.py /kaggle/working/
!cp /kaggle/input/allscripts/*.yaml /kaggle/working/
!cp /kaggle/input/allscripts/*.txt /kaggle/working/
!cp /kaggle/input/allscripts/*.md /kaggle/working/

# Verify files are copied
!ls -lh /kaggle/working/

print("✓ Files copied successfully!")

total 156K
drwxr-xr-x 2 root root 4.0K Feb 13 01:00 cache
-rw-r--r-- 1 root root  15K Feb 13 01:00 COMPLETE_DOCUMENTATION.md
drwxr-xr-x 2 root root 4.0K Feb 13 01:00 config
-rw-r--r-- 1 root root 1.5K Feb 13 01:00 config.yaml
drwxr-xr-x 5 root root 4.0K Feb 13 01:00 data
-rw-r--r-- 1 root root  15K Feb 13 01:00 data_preparation.py
-rw-r--r-- 1 root root 8.5K Feb 13 01:00 deployment.py
-rw-r--r-- 1 root root  13K Feb 13 01:00 inference.py
drwxr-xr-x 2 root root 4.0K Feb 13 01:00 logs
drwxr-xr-x 2 root root 4.0K Feb 13 01:00 models
-rw-r--r-- 1 root root  14K Feb 13 01:00 model_training.py
drwxr-xr-x 2 root root 4.0K Feb 13 01:00 outputs
-rw-r--r-- 1 root root 6.4K Feb 13 01:00 QUICKSTART.md
-rw-r--r-- 1 root root 2.6K Feb 13 01:00 README.md
-rw-r--r-- 1 root root  502 Feb 13 01:00 requirements.txt
-rw-r--r-- 1 root root 7.0K Feb 13 01:00 setup.py
drwxr-xr-x 2 root root 4.0K Feb 13 01:00 src
-rw-r--r-- 1 root root 9.8K Feb 13 01:00 test_system.py
-rw-r--r-- 1 root root 8.8K Feb 13 01:00 

In [5]:
# Import the modules we'll need
import utils
import data_preparation
import model_training
import inference
import deployment

print("✓ Modules imported successfully!")

2026-02-13 01:00:16.728299: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770944416.908114     103 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770944416.960217     103 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770944417.394756     103 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770944417.394803     103 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770944417.394806     103 computation_placer.cc:177] computation placer alr

✓ Modules imported successfully!


## Step 5: Prepare Data

In [6]:
from data_preparation import ArxivDatasetProcessor
from utils import load_config, setup_logging

logger = setup_logging()
config = load_config()

# Initialize processor
processor = ArxivDatasetProcessor(config)

# Load dataset
print("Loading arXiv dataset...")
df = processor.load_arxiv_dataset()
print(f"Loaded {len(df)} papers")

# Preview data
df.head()

2026-02-13 01:00:33,889 - utils - INFO - Loading arXiv dataset...
2026-02-13 01:00:33,890 - utils - INFO - Loading from HuggingFace datasets...


Loading arXiv dataset...


README.md: 0.00B [00:00, ?B/s]

scientific_papers.py: 0.00B [00:00, ?B/s]

2026-02-13 01:00:34,291 - utils - ERROR - Error loading dataset: Dataset scripts are no longer supported, but found scientific_papers.py
2026-02-13 01:00:34,291 - utils - INFO - Creating sample dataset for testing...


Loaded 3 papers


Unnamed: 0,title,abstract,article
0,Attention Is All You Need,"We propose a new simple network architecture, ...",The dominant sequence transduction models are ...
1,BERT: Pre-training of Deep Bidirectional Trans...,We introduce a new language representation mod...,Language model pre-training has been shown to ...
2,GPT-3: Language Models are Few-Shot Learners,Recent work has demonstrated substantial gains...,Language models have recently been shown to be...


In [7]:
# Prepare summarization dataset
print("Preparing summarization dataset...")
summ_dataset = processor.prepare_summarization_data(df)
summ_dataset.save_to_disk("./data/datasets/summarization")

print(f"Train samples: {len(summ_dataset['train'])}")
print(f"Validation samples: {len(summ_dataset['validation'])}")

# Preview sample
print("\nSample:")
print(summ_dataset['train'][0])

2026-02-13 01:00:34,319 - utils - INFO - Preparing summarization dataset...


Preparing summarization dataset...


Processing papers: 100%|██████████| 3/3 [00:00<00:00, 4056.39it/s]
2026-02-13 01:00:34,346 - utils - INFO - Train samples: 2
2026-02-13 01:00:34,346 - utils - INFO - Validation samples: 1


Saving the dataset (0/1 shards):   0%|          | 0/2 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1 [00:00<?, ? examples/s]

Train samples: 2
Validation samples: 1

Sample:
{'article': 'Language model pre-training has been shown to be effective for improving many natural language processing tasks. These include sentence-level tasks such as natural language inference and paraphrasing, which aim to predict the relationships between sentences by analyzing them holistically, as well as token-level tasks such as named entity recognition and question answering, where models are required to produce fine-grained output at the token level.Language model pre-training has been shown to be effective for improving many natural language processing tasks. These include sentence-level tasks such as natural language inference and paraphrasing, which aim to predict the relationships between sentences by analyzing them holistically, as well as token-level tasks such as named entity recognition and question answering, where models are required to produce fine-grained output at the token level.Language model pre-training has bee

In [8]:
# Prepare Q&A dataset
print("Preparing Q&A dataset...")
qa_dataset = processor.prepare_qa_data(df)
qa_dataset.save_to_disk("./data/datasets/qa")

print(f"Train Q&A pairs: {len(qa_dataset['train'])}")
print(f"Validation Q&A pairs: {len(qa_dataset['validation'])}")

# Preview sample
print("\nSample:")
print(qa_dataset['train'][0])

2026-02-13 01:00:34,390 - utils - INFO - Preparing Q&A dataset...


Preparing Q&A dataset...


Generating Q&A pairs: 100%|██████████| 3/3 [00:00<00:00, 4742.90it/s]
2026-02-13 01:00:34,408 - utils - INFO - Train Q&A pairs: 8
2026-02-13 01:00:34,409 - utils - INFO - Validation Q&A pairs: 1


Saving the dataset (0/1 shards):   0%|          | 0/8 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1 [00:00<?, ? examples/s]

Train Q&A pairs: 8
Validation Q&A pairs: 1

Sample:
{'question': 'What is the main contribution of this paper?', 'context': 'We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train.', 'answers': {'answer_start': [155], 'text': ['Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train']}}


## Step 6: Train Summarization Model

In [9]:
# ===================================
# CELL: Train Summarization Model
# ===================================

from model_training import SummarizationTrainer
from utils import load_config
import yaml

print("="*50)
print("TRAINING SUMMARIZATION MODEL")
print("="*50)

# Load and fix config
config = load_config()

# Ensure learning rates are floats
if isinstance(config['training']['summarizer']['learning_rate'], str):
    config['training']['summarizer']['learning_rate'] = float(config['training']['summarizer']['learning_rate'])
if isinstance(config['training']['qa']['learning_rate'], str):
    config['training']['qa']['learning_rate'] = float(config['training']['qa']['learning_rate'])

print(f"Learning rate: {config['training']['summarizer']['learning_rate']}")

# Initialize trainer
summ_trainer = SummarizationTrainer(config)

# Setup model
summ_trainer.setup_model_and_tokenizer()

# Load data
summ_trainer.load_and_tokenize_data()

# Train
summ_trainer.train()

print("✓ Summarization model training complete!")

2026-02-13 01:00:34,462 - utils - INFO - Loading model: sshleifer/distilbart-cnn-12-6


TRAINING SUMMARIZATION MODEL
Learning rate: 0.0003
Using GPU: Tesla T4
GPU Memory: 15.64 GB


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

2026-02-13 01:00:49,457 - utils - INFO - Model memory: 1228.53 MB
2026-02-13 01:00:49,458 - utils - INFO - Loading summarization dataset...
2026-02-13 01:00:49,468 - utils - INFO - Tokenizing summarization dataset...


trainable params: 1,572,864 || all params: 307,083,264 || trainable%: 0.5122


Tokenizing:   0%|          | 0/2 [00:00<?, ? examples/s]

Tokenizing:   0%|          | 0/1 [00:00<?, ? examples/s]

2026-02-13 01:00:49,581 - utils - INFO - Training samples: 2
2026-02-13 01:00:49,583 - utils - INFO - Validation samples: 1
2026-02-13 01:00:49,584 - utils - INFO - Starting summarization model training...
  trainer = Seq2SeqTrainer(


Step,Training Loss,Validation Loss


config.json: 0.00B [00:00, ?B/s]

2026-02-13 01:00:54,726 - utils - INFO - Model saved to ./models/summarizer_final
2026-02-13 01:00:54,727 - utils - INFO - Training metrics: {'train_runtime': 3.7334, 'train_samples_per_second': 1.607, 'train_steps_per_second': 0.804, 'total_flos': 9345454571520.0, 'train_loss': 12.915903727213541, 'epoch': 3.0}


2026-02-13 01:00:56,425 - absl - INFO - Using default tokenizer.
2026-02-13 01:00:56,428 - utils - INFO - Evaluation results: {'eval_loss': 12.905929565429688, 'eval_rouge1': 0.21153846153846154, 'eval_rouge2': 0.0392156862745098, 'eval_rougeL': 0.1346153846153846, 'eval_runtime': 1.6951, 'eval_samples_per_second': 0.59, 'eval_steps_per_second': 0.59, 'epoch': 3.0}


✓ Summarization model training complete!


## Step 7: Train Q&A Model

In [10]:
from model_training import QATrainer
from utils import clear_cuda_cache

# Clear memory from previous training
del summ_trainer
clear_cuda_cache()

print("="*50)
print("TRAINING Q&A MODEL")
print("="*50)

# Initialize trainer
qa_trainer = QATrainer(config)

# Setup model
qa_trainer.setup_model_and_tokenizer()

# Load data
qa_trainer.load_and_tokenize_data()

# Train
qa_trainer.train()

print("✓ Q&A model training complete!")

2026-02-13 01:00:56,668 - utils - INFO - Loading Q&A model: distilbert-base-uncased-distilled-squad


TRAINING Q&A MODEL
Using GPU: Tesla T4
GPU Memory: 15.64 GB


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

2026-02-13 01:00:59,291 - utils - INFO - Model memory: 265.46 MB
2026-02-13 01:00:59,292 - utils - INFO - Loading Q&A dataset...
2026-02-13 01:00:59,300 - utils - INFO - Tokenizing Q&A dataset...


Tokenizing Q&A:   0%|          | 0/8 [00:00<?, ? examples/s]

Tokenizing Q&A:   0%|          | 0/1 [00:00<?, ? examples/s]

2026-02-13 01:00:59,355 - utils - INFO - Training samples: 8
2026-02-13 01:00:59,356 - utils - INFO - Validation samples: 1
2026-02-13 01:00:59,357 - utils - INFO - Starting Q&A model training...
  trainer = Trainer(


Step,Training Loss,Validation Loss


2026-02-13 01:01:01,713 - utils - INFO - Q&A model saved to ./models/qa_final
2026-02-13 01:01:01,714 - utils - INFO - Training metrics: {'train_runtime': 1.461, 'train_samples_per_second': 10.951, 'train_steps_per_second': 1.369, 'total_flos': 1567837200384.0, 'train_loss': 3.5042238235473633, 'epoch': 2.0}


2026-02-13 01:01:01,737 - utils - INFO - Evaluation results: {'eval_loss': 3.149193525314331, 'eval_exact_match': 0.0, 'eval_f1': 0.0, 'eval_runtime': 0.0191, 'eval_samples_per_second': 52.377, 'eval_steps_per_second': 52.377, 'epoch': 2.0}


✓ Q&A model training complete!


## Step 8: Test Models with Sample Paper

In [11]:
from inference import PaperQASystem

# Sample paper for testing
sample_paper = """
Attention Is All You Need

Abstract: We propose a new simple network architecture, the Transformer, 
based solely on attention mechanisms, dispensing with recurrence and 
convolutions entirely. Experiments on two machine translation tasks show 
these models to be superior in quality while being more parallelizable 
and requiring significantly less time to train.

Introduction: The dominant sequence transduction models are based on 
complex recurrent or convolutional neural networks that include an 
encoder and a decoder. The best performing models also connect the 
encoder and decoder through an attention mechanism. We propose a new 
simple network architecture, the Transformer, based solely on attention 
mechanisms.

The Transformer follows the overall architecture using stacked self-attention 
and point-wise, fully connected layers for both the encoder and decoder. 
In this work we propose the Transformer, a model architecture eschewing 
recurrence and instead relying entirely on an attention mechanism to draw 
global dependencies between input and output.

Methods: Our model uses multi-head attention mechanisms which allow the model 
to jointly attend to information from different representation subspaces. 
We use eight parallel attention layers, or heads. For each of these we use 
dk=dv=dmodel/h=64 dimensions.
"""

print("Loading trained models...")
system = PaperQASystem(
    summarizer_path="./models/summarizer_final",
    qa_path="./models/qa_final"
)

print("✓ Models loaded successfully!")

2026-02-13 01:01:01,784 - utils - INFO - Initializing Paper Q&A System...
2026-02-13 01:01:01,791 - utils - INFO - Loading summarization model from ./models/summarizer_final


Loading trained models...
Using GPU: Tesla T4
GPU Memory: 15.64 GB


pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

2026-02-13 01:01:07,209 - utils - INFO - Summarization model loaded successfully
2026-02-13 01:01:07,229 - utils - INFO - Loading Q&A model from ./models/qa_final


Using GPU: Tesla T4
GPU Memory: 15.64 GB


Device set to use cuda:0
2026-02-13 01:01:07,617 - utils - INFO - Q&A model loaded successfully
2026-02-13 01:01:07,621 - utils - INFO - System initialized successfully


✓ Models loaded successfully!


In [12]:
# Test summarization
print("\n" + "="*60)
print("TESTING SUMMARIZATION")
print("="*60)

summary = system.summarizer.summarize(sample_paper)

print(f"\nOriginal length: {len(sample_paper.split())} words")
print(f"Summary length: {len(summary.split())} words")
print(f"\nSUMMARY:\n{summary}")


TESTING SUMMARIZATION

Original length: 185 words
Summary length: 86 words

SUMMARY:
 We propose a new simple network architecture, the Transformer, based solely on attention mechanisms. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. The Transformer follows the overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder. Our model uses multi-head attention mechanisms which allow the model to jointly attend to information from different representation subspaces. For each of these we use dk=dv=dmodel/h=64 dimensions.


In [13]:
# Test Q&A
print("\n" + "="*60)
print("TESTING Q&A")
print("="*60)

test_questions = [
    "What is the main contribution?",
    "What architecture is proposed?",
    "How many attention heads are used?",
    "What does the Transformer replace?"
]

for question in test_questions:
    result = system.qa.answer_question(question, sample_paper)
    print(f"\nQ: {question}")
    print(f"A: {result['answer']}")
    print(f"Confidence: {result['score']:.2%}")


TESTING Q&A

Q: What is the main contribution?
A: draw global dependencies between input and output
Confidence: 13.59%

Q: What architecture is proposed?
A: the Transformer
Confidence: 28.04%

Q: How many attention heads are used?
A: eight
Confidence: 91.05%

Q: What does the Transformer replace?
A: an attention mechanism to draw global dependencies between input and output
Confidence: 9.93%


In [14]:
# Test complete pipeline
print("\n" + "="*60)
print("TESTING COMPLETE PIPELINE")
print("="*60)

results = system.process_paper(
    paper_text=sample_paper,
    questions=test_questions
)

print(f"\nSUMMARY:\n{results['summary']}")
print("\nQ&A RESULTS:")
for qa in results['qa_results']:
    print(f"\nQ: {qa['question']}")
    print(f"A: {qa['answer']}")
    print(f"Confidence: {qa['score']:.2%}")

print(f"\nTotal processing time: {results['processing_time']:.2f}s")

2026-02-13 01:01:21,375 - utils - INFO - Generating summary...
2026-02-13 01:01:21,376 - utils - INFO - Processing 1 chunks...
2026-02-13 01:01:21,377 - utils - INFO - Summarizing chunk 1/1



TESTING COMPLETE PIPELINE


2026-02-13 01:01:23,010 - utils - INFO - Answering 4 questions...



SUMMARY:
 We propose a new simple network architecture, the Transformer, based solely on attention mechanisms. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. The Transformer follows the overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder. Our model uses multi-head attention mechanisms which allow the model to jointly attend to information from different representation subspaces. For each of these we use dk=dv=dmodel/h=64 dimensions.

Q&A RESULTS:

Q: What is the main contribution?
A: draw global dependencies between input and output
Confidence: 13.59%

Q: What architecture is proposed?
A: the Transformer
Confidence: 28.04%

Q: How many attention heads are used?
A: eight
Confidence: 91.05%

Q: What does the Transformer replace?
A: an attention mechanism to draw global dependencies between input an

## Step 9: Deploy with FastAPI and Ngrok

In [15]:

NGROK_TOKEN = "2sV7t8ID8Ezdy2IJEB3yngswQf7_81vvYkxHYuNe7z6AYBC2A"  

# Update config with ngrok token
if NGROK_TOKEN:
    config['deployment']['ngrok_token'] = NGROK_TOKEN
    import yaml
    with open('config.yaml', 'w') as f:
        yaml.dump(config, f)

In [16]:
import os
os.makedirs('static', exist_ok=True)

In [17]:
# ===================================
# CELL: HTML CODE
# ===================================

import os

html_better_errors = '''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>AI Paper Analyzer</title>
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&display=swap" rel="stylesheet">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
    <style>
        * { margin: 0; padding: 0; box-sizing: border-box; }
        body {
            font-family: 'Inter', sans-serif;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            min-height: 100vh;
            padding: 20px;
            color: #333;
        }
        .container { max-width: 1400px; margin: 0 auto; }
        .header {
            text-align: center;
            color: white;
            margin-bottom: 40px;
            animation: fadeInDown 0.8s ease;
        }
        .app-title {
            font-size: 3.5rem;
            font-weight: 700;
            margin-bottom: 10px;
            text-shadow: 2px 2px 4px rgba(0,0,0,0.2);
        }
        .app-subtitle {
            font-size: 1.3rem;
            font-weight: 300;
            opacity: 0.95;
        }
        .main-content {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 30px;
            animation: fadeInUp 0.8s ease;
        }
        .panel {
            background: white;
            border-radius: 20px;
            padding: 35px;
            box-shadow: 0 20px 60px rgba(0,0,0,0.3);
            transition: transform 0.3s ease;
        }
        .panel:hover { transform: translateY(-5px); }
        .panel-title {
            font-size: 1.8rem;
            font-weight: 600;
            color: #667eea;
            margin-bottom: 25px;
            display: flex;
            align-items: center;
            gap: 12px;
        }
        .upload-area {
            border: 3px dashed #667eea;
            border-radius: 15px;
            padding: 50px 30px;
            text-align: center;
            cursor: pointer;
            transition: all 0.3s ease;
            background: #f8f9ff;
            margin-bottom: 25px;
        }
        .upload-area:hover { background: #f0f2ff; border-color: #764ba2; }
        .upload-icon { font-size: 3.5rem; color: #667eea; margin-bottom: 15px; }
        .upload-text { font-size: 1.1rem; color: #666; margin-bottom: 8px; }
        .upload-hint { font-size: 0.9rem; color: #999; }
        .file-input { display: none; }
        .file-name {
            display: none;
            margin-top: 15px;
            padding: 12px;
            background: #e8f5e9;
            border-radius: 8px;
            color: #2e7d32;
            font-weight: 500;
        }
        .mode-selection {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 15px;
            margin-bottom: 25px;
        }
        .mode-btn {
            padding: 15px;
            border: 2px solid #e0e0e0;
            border-radius: 12px;
            background: white;
            cursor: pointer;
            transition: all 0.3s ease;
            display: flex;
            align-items: center;
            justify-content: center;
            gap: 10px;
            font-size: 1rem;
            font-weight: 600;
            color: #666;
        }
        .mode-btn:hover { border-color: #667eea; background: #f8f9ff; }
        .mode-btn.active {
            border-color: #667eea;
            background: linear-gradient(135deg, #f8f9ff 0%, #f0f2ff 100%);
            color: #667eea;
            box-shadow: 0 4px 15px rgba(102, 126, 234, 0.2);
        }
        .questions-section { display: none; }
        .questions-section.active { display: block; }
        .question-item { display: flex; gap: 10px; margin-bottom: 12px; }
        .question-input {
            flex: 1;
            padding: 14px 18px;
            border: 2px solid #e0e0e0;
            border-radius: 10px;
            font-size: 1rem;
            font-family: 'Inter', sans-serif;
        }
        .question-input:focus {
            outline: none;
            border-color: #667eea;
            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
        }
        .remove-question {
            background: #ff5252;
            color: white;
            border: none;
            width: 45px;
            border-radius: 10px;
            cursor: pointer;
            font-size: 1.2rem;
        }
        .add-question {
            width: 100%;
            padding: 14px;
            background: #f0f2ff;
            border: 2px dashed #667eea;
            border-radius: 10px;
            color: #667eea;
            font-weight: 600;
            cursor: pointer;
            font-size: 1rem;
        }
        .action-buttons {
            display: grid;
            grid-template-columns: 1fr 2fr;
            gap: 15px;
            margin-top: 25px;
        }
        .btn {
            padding: 16px 32px;
            border: none;
            border-radius: 12px;
            font-size: 1.1rem;
            font-weight: 600;
            cursor: pointer;
            display: flex;
            align-items: center;
            justify-content: center;
            gap: 10px;
            transition: all 0.3s ease;
        }
        .btn-primary {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            box-shadow: 0 10px 30px rgba(102, 126, 234, 0.4);
        }
        .btn-primary:hover {
            transform: translateY(-2px);
            box-shadow: 0 15px 40px rgba(102, 126, 234, 0.5);
        }
        .btn-primary:disabled { opacity: 0.5; cursor: not-allowed; }
        .btn-secondary { background: white; color: #667eea; border: 2px solid #667eea; }
        .btn-secondary:hover { background: #f0f2ff; }
        .results-container { display: none; }
        .summary-box {
            background: linear-gradient(135deg, #f8f9ff 0%, #f0f2ff 100%);
            padding: 25px;
            border-radius: 15px;
            border-left: 5px solid #667eea;
            line-height: 1.8;
            font-size: 1.05rem;
        }
        .answer-card {
            background: white;
            border: 2px solid #e0e0e0;
            border-radius: 15px;
            padding: 20px;
            margin-bottom: 15px;
            transition: all 0.3s ease;
        }
        .answer-card:hover {
            border-color: #667eea;
            box-shadow: 0 5px 20px rgba(102, 126, 234, 0.15);
        }
        .answer-question {
            font-weight: 600;
            color: #667eea;
            font-size: 1.1rem;
            margin-bottom: 12px;
        }
        .answer-text {
            color: #333;
            line-height: 1.7;
            margin-bottom: 12px;
            padding-left: 30px;
        }
        .confidence-bar {
            display: flex;
            align-items: center;
            gap: 12px;
            padding-left: 30px;
        }
        .progress-bar {
            flex: 1;
            height: 10px;
            background: #e0e0e0;
            border-radius: 10px;
            overflow: hidden;
        }
        .progress-fill {
            height: 100%;
            background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
        }
        .loading { display: none; text-align: center; padding: 40px; }
        .spinner {
            border: 4px solid #f3f3f3;
            border-top: 4px solid #667eea;
            border-radius: 50%;
            width: 50px;
            height: 50px;
            animation: spin 1s linear infinite;
            margin: 0 auto 20px;
        }
        .error-display {
            background: #ffebee;
            border: 2px solid #f44336;
            border-radius: 10px;
            padding: 20px;
            margin: 20px 0;
            color: #c62828;
        }
        .error-display h4 {
            margin-bottom: 10px;
            display: flex;
            align-items: center;
            gap: 10px;
        }
        @keyframes spin { 100% { transform: rotate(360deg); } }
        @keyframes fadeInDown { from { opacity: 0; transform: translateY(-30px); } to { opacity: 1; transform: translateY(0); } }
        @keyframes fadeInUp { from { opacity: 0; transform: translateY(30px); } to { opacity: 1; transform: translateY(0); } }
        @media (max-width: 968px) {
            .main-content { grid-template-columns: 1fr; }
            .app-title { font-size: 2.5rem; }
            .mode-selection { grid-template-columns: 1fr; }
        }
        .toast {
            position: fixed;
            bottom: 30px;
            right: 30px;
            background: white;
            padding: 20px 25px;
            border-radius: 12px;
            box-shadow: 0 10px 40px rgba(0,0,0,0.3);
            display: none;
            z-index: 1000;
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1 class="app-title"><i class="fas fa-brain"></i> AI Paper Analyzer</h1>
            <p class="app-subtitle">Professional Research Assistant powered by Advanced AI</p>
        </div>
        <div class="main-content">
            <div class="panel">
                <h2 class="panel-title"><i class="fas fa-upload"></i> Upload Paper</h2>
                <div class="upload-area" id="uploadArea">
                    <div class="upload-icon"><i class="fas fa-cloud-upload-alt"></i></div>
                    <div class="upload-text">Drag & Drop your paper here</div>
                    <div class="upload-hint">or click to browse (PDF, TXT)</div>
                    <input type="file" id="fileInput" class="file-input" accept=".pdf,.txt">
                </div>
                <div class="file-name" id="fileName"></div>
                <h2 class="panel-title" style="margin-top: 30px;"><i class="fas fa-tasks"></i> Select Mode</h2>
                <div class="mode-selection">
                    <button class="mode-btn" id="summarizeBtn" onclick="selectMode('summarize')">
                        <i class="fas fa-file-alt"></i> Summarize Only
                    </button>
                    <button class="mode-btn" id="qaBtn" onclick="selectMode('qa')">
                        <i class="fas fa-question-circle"></i> Q&A Only
                    </button>
                </div>
                <div class="questions-section" id="questionsSection">
                    <h2 class="panel-title"><i class="fas fa-comments"></i> Your Questions</h2>
                    <div id="questionsContainer">
                        <div class="question-item">
                            <input type="text" class="question-input" placeholder="Enter your question...">
                            <button class="remove-question" onclick="removeQuestion(this)"><i class="fas fa-times"></i></button>
                        </div>
                    </div>
                    <button class="add-question" onclick="addQuestion()"><i class="fas fa-plus"></i> Add Another Question</button>
                </div>
                <div class="action-buttons">
                    <button class="btn btn-secondary" onclick="clearAll()"><i class="fas fa-redo"></i> Reset</button>
                    <button class="btn btn-primary" id="processBtn" onclick="processDocument()"><i class="fas fa-magic"></i> Process</button>
                </div>
            </div>
            <div class="panel">
                <h2 class="panel-title"><i class="fas fa-chart-line"></i> Results</h2>
                <div class="loading" id="loadingState">
                    <div class="spinner"></div>
                    <div>Processing your paper...</div>
                </div>
                <div class="results-container" id="resultsContainer">
                    <div id="summarySection" style="display: none;">
                        <h3 style="color: #667eea; margin-bottom: 15px;"><i class="fas fa-file-alt"></i> Summary</h3>
                        <div class="summary-box" id="summaryBox"></div>
                    </div>
                    <div id="qaSection" style="display: none;">
                        <h3 style="color: #667eea; margin-bottom: 15px;"><i class="fas fa-comments"></i> Answers</h3>
                        <div id="answersContainer"></div>
                    </div>
                    <div id="processingTime" style="text-align: center; margin-top: 20px; color: #667eea;"></div>
                </div>
                <div id="errorDisplay" style="display: none;"></div>
                <div id="initialState" style="text-align: center; color: #999; padding: 60px 20px;">
                    <i class="fas fa-lightbulb" style="font-size: 4rem; opacity: 0.3; margin-bottom: 20px;"></i>
                    <p style="font-size: 1.2rem;">Upload a paper and select a mode to get started</p>
                    <p style="font-size: 0.9rem; margin-top: 10px; opacity: 0.7;">Choose "Summarize" for quick overview or "Q&A" to ask specific questions</p>
                </div>
            </div>
        </div>
    </div>
    <div class="toast" id="toast"></div>
    <script>
        const API_BASE_URL = window.location.origin;
        let uploadedFile = null;
        let selectedMode = null;

        document.getElementById('uploadArea').addEventListener('click', () => document.getElementById('fileInput').click());
        document.getElementById('fileInput').addEventListener('change', (e) => {
            const file = e.target.files[0];
            if (file && (file.type === 'application/pdf' || file.type === 'text/plain' || file.name.endsWith('.pdf') || file.name.endsWith('.txt'))) {
                uploadedFile = file;
                document.getElementById('fileName').textContent = `✓ ${file.name} (${(file.size / 1024).toFixed(1)} KB)`;
                document.getElementById('fileName').style.display = 'block';
                showToast('File uploaded successfully!', 'success');
            } else {
                showToast('Please upload PDF or TXT file', 'error');
            }
        });

        function selectMode(mode) {
            selectedMode = mode;
            document.getElementById('summarizeBtn').classList.remove('active');
            document.getElementById('qaBtn').classList.remove('active');
            if (mode === 'summarize') {
                document.getElementById('summarizeBtn').classList.add('active');
                document.getElementById('questionsSection').classList.remove('active');
            } else if (mode === 'qa') {
                document.getElementById('qaBtn').classList.add('active');
                document.getElementById('questionsSection').classList.add('active');
            }
        }

        function addQuestion() {
            const container = document.getElementById('questionsContainer');
            const div = document.createElement('div');
            div.className = 'question-item';
            div.innerHTML = `
                <input type="text" class="question-input" placeholder="Enter your question...">
                <button class="remove-question" onclick="removeQuestion(this)"><i class="fas fa-times"></i></button>
            `;
            container.appendChild(div);
        }

        function removeQuestion(btn) {
            if (document.getElementById('questionsContainer').children.length > 1) {
                btn.parentElement.remove();
            }
        }

        function clearAll() {
            uploadedFile = null;
            selectedMode = null;
            document.getElementById('fileInput').value = '';
            document.getElementById('fileName').style.display = 'none';
            document.getElementById('summarizeBtn').classList.remove('active');
            document.getElementById('qaBtn').classList.remove('active');
            document.getElementById('questionsSection').classList.remove('active');
            document.getElementById('questionsContainer').innerHTML = `
                <div class="question-item">
                    <input type="text" class="question-input" placeholder="Enter your question...">
                    <button class="remove-question" onclick="removeQuestion(this)"><i class="fas fa-times"></i></button>
                </div>
            `;
            document.getElementById('resultsContainer').style.display = 'none';
            document.getElementById('errorDisplay').style.display = 'none';
            document.getElementById('initialState').style.display = 'block';
            showToast('Form reset', 'success');
        }

        async function processDocument() {
            if (!uploadedFile) {
                showToast('Please upload a file first', 'error');
                return;
            }
            if (!selectedMode) {
                showToast('Please select a mode (Summarize or Q&A)', 'error');
                return;
            }

            const inputs = document.querySelectorAll('.question-input');
            const questions = Array.from(inputs).map(i => i.value.trim()).filter(q => q);
            
            if (selectedMode === 'qa' && questions.length === 0) {
                showToast('Please enter at least one question for Q&A mode', 'error');
                return;
            }

            document.getElementById('loadingState').style.display = 'block';
            document.getElementById('resultsContainer').style.display = 'none';
            document.getElementById('errorDisplay').style.display = 'none';
            document.getElementById('initialState').style.display = 'none';
            document.getElementById('processBtn').disabled = true;

            try {
                const formData = new FormData();
                formData.append('file', uploadedFile);
                formData.append('questions', selectedMode === 'qa' ? questions.join(',') : '');

                console.log('=== REQUEST ===');
                console.log('File:', uploadedFile.name, uploadedFile.type);
                console.log('Mode:', selectedMode);
                console.log('Questions:', questions);

                const response = await fetch(`${API_BASE_URL}/upload-pdf`, {
                    method: 'POST',
                    body: formData
                });

                console.log('=== RESPONSE ===');
                console.log('Status:', response.status);
                console.log('Status Text:', response.statusText);
                
                const responseText = await response.text();
                console.log('Raw Response:', responseText);
                
                let data;
                try {
                    data = JSON.parse(responseText);
                } catch (e) {
                    throw new Error(`Invalid JSON response: ${responseText}`);
                }
                
                console.log('Parsed Data:', data);

                if (!response.ok) {
                    throw new Error(data.detail || 'Processing failed');
                }
                
                document.getElementById('summarySection').style.display = 'none';
                document.getElementById('qaSection').style.display = 'none';
                
                if (selectedMode === 'summarize') {
                    if (data.summary && data.summary.trim()) {
                        document.getElementById('summaryBox').textContent = data.summary;
                        document.getElementById('summarySection').style.display = 'block';
                    } else {
                        throw new Error('No summary received from server');
                    }
                } else if (selectedMode === 'qa') {
                    if (data.qa_results && Array.isArray(data.qa_results) && data.qa_results.length > 0) {
                        const answersContainer = document.getElementById('answersContainer');
                        answersContainer.innerHTML = '';
                        
                        data.qa_results.forEach(qa => {
                            const card = document.createElement('div');
                            card.className = 'answer-card';
                            const confidence = (qa.score * 100).toFixed(1);
                            card.innerHTML = `
                                <div class="answer-question"><i class="fas fa-question-circle"></i> ${qa.question}</div>
                                <div class="answer-text"><i class="fas fa-comment-dots" style="color: #667eea;"></i> ${qa.answer}</div>
                                <div class="confidence-bar">
                                    <span style="color: #666; font-size: 0.9rem;">Confidence:</span>
                                    <div class="progress-bar">
                                        <div class="progress-fill" style="width: ${confidence}%"></div>
                                    </div>
                                    <span style="color: #667eea; font-weight: 600;">${confidence}%</span>
                                </div>
                            `;
                            answersContainer.appendChild(card);
                        });
                        
                        document.getElementById('qaSection').style.display = 'block';
                    } else {
                        throw new Error('No Q&A results received from server');
                    }
                }

                document.getElementById('processingTime').innerHTML = 
                    `<i class="fas fa-clock"></i> Processing completed in ${data.processing_time.toFixed(2)} seconds`;
                
                document.getElementById('resultsContainer').style.display = 'block';
                showToast('Processing complete!', 'success');

            } catch (error) {
                console.error('=== ERROR ===');
                console.error(error);
                
                const errorDiv = document.getElementById('errorDisplay');
                errorDiv.className = 'error-display';
                errorDiv.innerHTML = `
                    <h4><i class="fas fa-exclamation-triangle"></i> Error Processing Document</h4>
                    <p><strong>Error:</strong> ${error.message}</p>
                    <p><strong>File:</strong> ${uploadedFile.name}</p>
                    <p><strong>Mode:</strong> ${selectedMode}</p>
                    <p style="margin-top: 10px; font-size: 0.9rem;">Check the browser console (F12) for detailed logs.</p>
                `;
                errorDiv.style.display = 'block';
                
                showToast(error.message, 'error');
            } finally {
                document.getElementById('loadingState').style.display = 'none';
                document.getElementById('processBtn').disabled = false;
            }
        }

        function showToast(message, type) {
            const toast = document.getElementById('toast');
            toast.textContent = message;
            toast.style.display = 'block';
            toast.style.borderLeft = type === 'success' ? '5px solid #4caf50' : '5px solid #f44336';
            setTimeout(() => toast.style.display = 'none', 3000);
        }
    </script>
</body>
</html>'''

with open('static/index.html', 'w', encoding='utf-8') as f:
    f.write(html_better_errors)



In [18]:
# ===================================
# CELL: Create Bulletproof Backend
# ===================================

deployment_fixed = '''import os
from fastapi import FastAPI, HTTPException, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from typing import Optional, List
import PyPDF2
from io import BytesIO
import time
import traceback

from inference import PaperQASystem
from utils import setup_logging, load_config

logger = setup_logging()

class SummarizeRequest(BaseModel):
    text: str
    max_length: Optional[int] = None
    min_length: Optional[int] = None

class QuestionRequest(BaseModel):
    question: str
    context: str

class MultiQuestionRequest(BaseModel):
    questions: List[str]
    context: str

app = FastAPI(
    title="AI Paper Analyzer",
    description="Professional AI Research Assistant",
    version="1.0.0"
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

os.makedirs("static", exist_ok=True)
app.mount("/static", StaticFiles(directory="static"), name="static")

system = None

@app.on_event("startup")
async def startup_event():
    global system
    logger.info("Loading models...")
    try:
        system = PaperQASystem(
            summarizer_path="./models/summarizer_final",
            qa_path="./models/qa_final"
        )
        logger.info("Models loaded successfully")
    except Exception as e:
        logger.error(f"Failed to load models: {e}")
        try:
            system = PaperQASystem(
                summarizer_path="sshleifer/distilbart-cnn-12-6",
                qa_path="distilbert-base-uncased-distilled-squad"
            )
        except Exception as e2:
            logger.error(f"Failed to load base models: {e2}")
            raise

@app.get("/")
async def root():
    return FileResponse("static/index.html")

@app.get("/health")
async def health_check():
    return {"status": "healthy", "models_loaded": system is not None}

@app.post("/summarize")
async def summarize_text(request: SummarizeRequest):
    try:
        if not system:
            raise HTTPException(status_code=503, detail="Models not loaded")
        
        summary = system.summarizer.summarize(
            text=request.text,
            max_length=request.max_length,
            min_length=request.min_length
        )
        
        return {
            "summary": summary,
            "input_length": len(request.text.split()),
            "summary_length": len(summary.split())
        }
    except Exception as e:
        logger.error(f"Summarization error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/ask")
async def answer_question(request: QuestionRequest):
    try:
        if not system:
            raise HTTPException(status_code=503, detail="Models not loaded")
        
        result = system.qa.answer_question(
            question=request.question,
            context=request.context
        )
        return result
    except Exception as e:
        logger.error(f"Q&A error: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/upload-pdf")
async def upload_file(
    file: UploadFile = File(...),
    questions: str = Form(default="")
):
    """Upload and process a file (PDF or TXT)"""
    try:
        if not system:
            raise HTTPException(status_code=503, detail="Models not loaded")
        
        logger.info(f"Received file: {file.filename}, content_type: {file.content_type}")
        logger.info(f"Questions parameter: {questions}")
        
        # Read file contents
        contents = await file.read()
        logger.info(f"Read {len(contents)} bytes")
        
        # Extract text based on file type
        text = ""
        filename_lower = file.filename.lower()
        
        try:
            if filename_lower.endswith('.pdf'):
                logger.info("Processing as PDF...")
                pdf_file = BytesIO(contents)
                pdf_reader = PyPDF2.PdfReader(pdf_file)
                logger.info(f"PDF has {len(pdf_reader.pages)} pages")
                
                for i, page in enumerate(pdf_reader.pages):
                    page_text = page.extract_text()
                    text += page_text + "\\n"
                    logger.info(f"Page {i+1}: extracted {len(page_text)} chars")
                    
            elif filename_lower.endswith('.txt'):
                logger.info("Processing as TXT...")
                try:
                    text = contents.decode('utf-8')
                except UnicodeDecodeError:
                    text = contents.decode('latin-1')
            else:
                raise HTTPException(
                    status_code=400,
                    detail=f"Unsupported file type: {file.filename}. Please upload PDF or TXT."
                )
            
            logger.info(f"Total extracted text: {len(text)} characters")
            
            if not text.strip():
                raise HTTPException(
                    status_code=400,
                    detail="No text could be extracted from the file"
                )
        
        except HTTPException:
            raise
        except Exception as e:
            logger.error(f"Text extraction failed: {e}")
            logger.error(traceback.format_exc())
            raise HTTPException(
                status_code=500,
                detail=f"Failed to extract text: {str(e)}"
            )
        
        # Process based on whether questions were provided
        start_time = time.time()
        
        if questions and questions.strip():
            # Q&A Mode
            question_list = [q.strip() for q in questions.split(',') if q.strip()]
            logger.info(f"Q&A mode: processing {len(question_list)} questions")
            
            try:
                qa_results = system.qa.answer_multiple_questions(question_list, text)
                processing_time = time.time() - start_time
                
                logger.info(f"Q&A completed: {len(qa_results)} answers in {processing_time:.2f}s")
                
                return {
                    "summary": "",
                    "qa_results": qa_results,
                    "processing_time": processing_time,
                    "filename": file.filename,
                    "text_length": len(text.split()),
                    "mode": "qa"
                }
            except Exception as e:
                logger.error(f"Q&A processing failed: {e}")
                logger.error(traceback.format_exc())
                raise HTTPException(status_code=500, detail=f"Q&A failed: {str(e)}")
        else:
            # Summarize Mode
            logger.info("Summarize mode: generating summary")
            
            try:
                summary = system.summarizer.summarize(text)
                processing_time = time.time() - start_time
                
                logger.info(f"Summary completed in {processing_time:.2f}s")
                
                return {
                    "summary": summary,
                    "qa_results": [],
                    "processing_time": processing_time,
                    "filename": file.filename,
                    "text_length": len(text.split()),
                    "mode": "summarize"
                }
            except Exception as e:
                logger.error(f"Summarization failed: {e}")
                logger.error(traceback.format_exc())
                raise HTTPException(status_code=500, detail=f"Summarization failed: {str(e)}")
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        logger.error(traceback.format_exc())
        raise HTTPException(status_code=500, detail=f"Server error: {str(e)}")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")
'''

with open('deployment_ui.py', 'w') as f:
    f.write(deployment_fixed)

print("✓ Created bulletproof backend with:")
print("  - Extensive logging at every step")
print("  - Better error handling")
print("  - Form data instead of query params")
print("  - Detailed error messages")

✓ Created bulletproof backend with:
  - Extensive logging at every step
  - Better error handling
  - Form data instead of query params
  - Detailed error messages


In [19]:
# Restart server
import nest_asyncio
nest_asyncio.apply()

import threading, time
from pyngrok import ngrok
import uvicorn
from deployment_ui import app

PORT = 8000
NGROK_TOKEN = "2sV7t8ID8Ezdy2IJEB3yngswQf7_81vvYkxHYuNe7z6AYBC2A"

ngrok.set_auth_token(NGROK_TOKEN) if NGROK_TOKEN != "YOUR_TOKEN" else None

class ServerRunner:
    def __init__(self, app, port):
        self.app, self.port = app, port
    def run(self):
        uvicorn.Server(uvicorn.Config(self.app, host="0.0.0.0", port=self.port, log_level="info")).run()

threading.Thread(target=ServerRunner(app, PORT).run, daemon=True).start()
time.sleep(5)

public_url = ngrok.connect(PORT, bind_tls=True)
print(f"\n✅ LIVE AT: {public_url}\n")

                                                                                                    

2026-02-13 01:01:24,071 - pyngrok.process - INFO - Updating authtoken for default "config_path" of "ngrok_path": /root/.config/ngrok/ngrok
INFO:     Started server process [103]
INFO:     Waiting for application startup.
2026-02-13 01:01:24,232 - utils - INFO - Loading models...
2026-02-13 01:01:24,237 - utils - INFO - Initializing Paper Q&A System...
2026-02-13 01:01:24,242 - utils - INFO - Loading summarization model from ./models/summarizer_final


Using GPU: Tesla T4
GPU Memory: 15.64 GB


2026-02-13 01:01:26,340 - utils - INFO - Summarization model loaded successfully
2026-02-13 01:01:26,345 - utils - INFO - Loading Q&A model from ./models/qa_final
Device set to use cuda:0
2026-02-13 01:01:26,501 - utils - INFO - Q&A model loaded successfully
2026-02-13 01:01:26,501 - utils - INFO - System initialized successfully
2026-02-13 01:01:26,502 - utils - INFO - Models loaded successfully
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Using GPU: Tesla T4
GPU Memory: 15.64 GB


2026-02-13 01:01:29,097 - pyngrok.ngrok - INFO - Opening tunnel named: http-8000-0bac63ad-bc93-4cc4-8e3c-ee453c3b45dc
2026-02-13 01:01:29,115 - pyngrok.process.ngrok - INFO - t=2026-02-13T01:01:29+0000 lvl=info msg="no configuration paths supplied"
2026-02-13 01:01:29,116 - pyngrok.process.ngrok - INFO - t=2026-02-13T01:01:29+0000 lvl=info msg="using configuration at default config path" path=/root/.config/ngrok/ngrok.yml
2026-02-13 01:01:29,118 - pyngrok.process.ngrok - INFO - t=2026-02-13T01:01:29+0000 lvl=info msg="open config file" path=/root/.config/ngrok/ngrok.yml err=nil
2026-02-13 01:01:29,135 - pyngrok.process.ngrok - INFO - t=2026-02-13T01:01:29+0000 lvl=info msg="FIPS 140 mode" enabled=false
2026-02-13 01:01:29,142 - pyngrok.process.ngrok - INFO - t=2026-02-13T01:01:29+0000 lvl=info msg="starting web service" obj=web addr=127.0.0.1:4040 allow_hosts=[]
2026-02-13 01:01:29,289 - pyngrok.process.ngrok - INFO - t=2026-02-13T01:01:29+0000 lvl=info msg="client session established"


✅ LIVE AT: NgrokTunnel: "https://2c31-34-61-103-96.ngrok-free.app" -> "http://localhost:8000"



2026-02-13 01:01:29,330 - pyngrok.process.ngrok - INFO - t=2026-02-13T01:01:29+0000 lvl=info msg=end pg=/api/tunnels id=28817ae3c099c4e9 status=201 dur=30.232789ms


In [20]:
# ===================================
# CELL: Direct Test - Upload a Real File
# ===================================

import requests
import json

BASE_URL = "http://localhost:8000"

# Create a simple test PDF or TXT
test_content = """
The Transformer model uses attention mechanisms.
It was introduced in 2017 by Vaswani et al.
The model achieves excellent results on translation tasks.
"""

# Save as text file
with open('simple_test.txt', 'w') as f:
    f.write(test_content)

print("Testing with TXT file...")
with open('simple_test.txt', 'rb') as f:
    files = {'file': ('simple_test.txt', f, 'text/plain')}
    data = {'questions': 'What is the Transformer?'}
    
    response = requests.post(f"{BASE_URL}/upload-pdf", files=files, data=data)
    
    print(f"\nStatus: {response.status_code}")
    if response.status_code == 200:
        result = response.json()
        print(f"Success!")
        print(f"Mode: {result.get('mode')}")
        print(f"QA Results: {len(result.get('qa_results', []))} answers")
        if result.get('qa_results'):
            for qa in result['qa_results']:
                print(f"\nQ: {qa['question']}")
                print(f"A: {qa['answer']}")
    else:
        print(f"Error: {response.text}")

# Now check what the frontend is sending
print("\n" + "="*60)
print("Now test from the UI and paste the console logs here")
print("="*60)

2026-02-13 01:01:29,345 - utils - INFO - Received file: simple_test.txt, content_type: text/plain
2026-02-13 01:01:29,346 - utils - INFO - Questions parameter: What is the Transformer?
2026-02-13 01:01:29,346 - utils - INFO - Read 153 bytes
2026-02-13 01:01:29,347 - utils - INFO - Processing as TXT...
2026-02-13 01:01:29,348 - utils - INFO - Total extracted text: 153 characters
2026-02-13 01:01:29,349 - utils - INFO - Q&A mode: processing 1 questions
2026-02-13 01:01:29,359 - utils - INFO - Q&A completed: 1 answers in 0.01s


Testing with TXT file...
INFO:     127.0.0.1:59578 - "POST /upload-pdf HTTP/1.1" 200 OK

Status: 200
Success!
Mode: qa
QA Results: 1 answers

Q: What is the Transformer?
A: attention mechanisms

Now test from the UI and paste the console logs here


## Step 10: Test API Endpoints

Once the server is running, you can test the endpoints:

In [21]:
import requests

BASE_URL = "http://localhost:8000"  

# Test summarization
response = requests.post(
    f"{BASE_URL}/summarize",
    json={"text": sample_paper}
)
print("Summarization result:")
print(response.json())

INFO:     127.0.0.1:59592 - "POST /summarize HTTP/1.1" 200 OK
Summarization result:
{'summary': ' We propose a new simple network architecture, the Transformer, based solely on attention mechanisms. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. The Transformer follows the overall architecture using stacked self-attention and point-wise, fully connected layers for both the encoder and decoder. Our model uses multi-head attention mechanisms which allow the model to jointly attend to information from different representation subspaces. For each of these we use dk=dv=dmodel/h=64 dimensions.', 'input_length': 185, 'summary_length': 86}


In [22]:
# Test Q&A
response = requests.post(
    f"{BASE_URL}/ask",
    json={
        "question": "What is the main contribution?",
        "context": sample_paper
    }
)
print("Q&A result:")
print(response.json())

INFO:     127.0.0.1:59594 - "POST /ask HTTP/1.1" 200 OK
Q&A result:
{'answer': 'draw global dependencies between input and output', 'score': 0.13593682646751404, 'start': 1008, 'end': 1057}


## Step 11: Save Models for Later Use

In [23]:
# Models are already saved in ./models/
# You can download them or upload to Kaggle Datasets

import shutil

# Create archive of models
shutil.make_archive('trained_models', 'zip', './models')
print("✓ Models archived as trained_models.zip")
print("Download this file to save your trained models!")

✓ Models archived as trained_models.zip
Download this file to save your trained models!
