In [1]:
import sys, torch
print("Python:", sys.version)
print("Torch CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
else:
    print("Running on CPU.")


Python: 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]
Torch CUDA available: False
Running on CPU.


In [2]:
!pip install --quiet fastapi uvicorn nest-asyncio pyngrok transformers arxiv streamlit
# If you hit model-related bugs, uncomment next line to upgrade transformers from GitHub head:
# !pip install --quiet -U git+https://github.com/huggingface/transformers.git


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m83.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone


In [3]:
import nest_asyncio
from pyngrok import ngrok
import os, threading, textwrap

# Allow nested event loops (needed in notebooks)
nest_asyncio.apply()

# Kill ngrok processes from previous runs (ignore errors)
!pkill -f ngrok || echo "No ngrok process to kill."


^C


In [4]:
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

NUM_PAPERS = 5
SUMMARIZER_MODEL = "sshleifer/distilbart-cnn-12-6"
LLM_MODEL = "MBZUAI/LaMini-Flan-T5-783M"

print("Loading summarizer...")
summarizer = pipeline("summarization", model=SUMMARIZER_MODEL)

print("Loading LLM (tokenizer + model)...")
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(LLM_MODEL)

print("Models loaded.")


Loading summarizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


Loading LLM (tokenizer + model)...


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/860 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Models loaded.


In [5]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import arxiv

app = FastAPI(title="AI Researcher API (Colab)")

class TopicRequest(BaseModel):
    topic: str

SECTION_PROMPTS = {
    "Introduction": """Write a detailed academic introduction (~300 words) for a literature review on the use of AI in healthcare, based on the following paper summaries. Explain why the topic is important, current challenges, and the motivation for reviewing recent research.

Paper summaries:
{summaries}
""",
    "Key Approaches and Findings": """Describe in detail (~400 words) the most important technical approaches, methodologies, and key findings from the following summaries. Emphasize specific models, datasets, innovations, and how they differ from each other.

Paper summaries:
{summaries}
""",
    "Comparative Analysis": """Write a thorough comparative analysis (~400 words) of the research papers summarized below. Focus on comparing strengths, limitations, performance, explainability, and generalizability of the approaches used.

Paper summaries:
{summaries}
""",
    "Gaps and Future Directions": """Write a comprehensive discussion (~300 words) about the gaps, limitations, and unanswered questions based on these paper summaries. Then suggest concrete future directions for research in AI for healthcare.

Paper summaries:
{summaries}
"""
}

def fetch_and_summarize_papers(topic: str):
    search = arxiv.Search(query=topic, max_results=NUM_PAPERS, sort_by=arxiv.SortCriterion.Relevance)
    results = list(search.results())
    if not results:
        return None, None

    summaries, bib_entries = [], []

    for i, result in enumerate(results):
        abstract = result.summary.strip().replace("\n", " ")
        try:
            summary = summarizer(abstract, max_length=150, min_length=60, do_sample=False)[0]['summary_text']
        except Exception:
            summary = abstract[:300]
        summaries.append(f"- {summary}")

        authors = ", ".join(a.name for a in result.authors)
        year = result.published.year
        title = result.title.strip()
        url = result.entry_id
        bib_entries.append(f"{i+1}. {authors} ({year}). *{title}*. arXiv. {url}")

    return "\n".join(summaries), bib_entries

def generate_section(prompt_text: str):
    inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=1024)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        do_sample=False,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

def build_lit_review(topic: str):
    summaries_text, bib_entries = fetch_and_summarize_papers(topic)
    if not summaries_text:
        raise HTTPException(status_code=404, detail="No papers found for this topic.")

    full_review = ""
    for section, template in SECTION_PROMPTS.items():
        prompt = template.format(summaries=summaries_text)
        section_text = generate_section(prompt)
        full_review += f"### {section}\n{section_text}\n\n"

    return {"review": full_review, "bibliography": bib_entries}

@app.post("/generate_review")
def generate_review(req: TopicRequest):
    return build_lit_review(req.topic)


In [6]:
import uvicorn

def run_backend():
    uvicorn.run(app, host="0.0.0.0", port=8000)

backend_thread = threading.Thread(target=run_backend, daemon=True)
backend_thread.start()
print("FastAPI backend starting on port 8000...")


FastAPI backend starting on port 8000...


In [12]:
streamlit_app_code = """\
import streamlit as st
import requests

API_URL = "http://localhost:8000/generate_review"

st.set_page_config(page_title="AI Researcher", layout="wide")
st.title("🤖📚 AI Researcher")

st.markdown(
    "Enter a research topic below. This app fetches arXiv papers, summarizes them, and builds a structured literature review."
)

topic = st.text_input("Enter Research Topic", "AI in Healthcare")

if st.button("Generate Literature Review") and topic:
    with st.spinner("Generating... please wait (models running)..."):
        try:
            res = requests.post(API_URL, json={"topic": topic})
            res.raise_for_status()
            data = res.json()

            st.markdown("## 📄 Literature Review")
            st.markdown(data["review"])

            st.markdown("## 📚 Bibliography")
            for entry in data["bibliography"]:
                st.markdown(entry)

        except requests.exceptions.RequestException as e:
            st.error(f"Request failed: {e}")
else:
    st.info("Enter a topic and click the button above.")
"""
with open("app.py", "w") as f:
    f.write(streamlit_app_code)

print("Streamlit app.py written.")


Streamlit app.py written.


In [13]:
!ngrok config add-authtoken 30CLp1Wz4rtzJSzrvTplxZjVQiZ_6cMtxKK97o3e4MDMVRAoa

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [14]:
# Start Streamlit (background); suppress noisy output into log file
!nohup streamlit run app.py --server.port 8501 --server.headless true > streamlit.log 2>&1 &

# Open ngrok tunnel to Streamlit
streamlit_public_url = ngrok.connect(addr=8501)
print("🌐 Streamlit public URL:", streamlit_public_url.public_url)

print("\nYou can open that URL in your browser. Backend + frontend are running inside Colab.")


🌐 Streamlit public URL: https://daf52d8c5ab9.ngrok-free.app

You can open that URL in your browser. Backend + frontend are running inside Colab.


In [11]:
import requests, json
test_topic = "AI in healthcare"
r = requests.post("http://localhost:8000/generate_review", json={"topic": test_topic})
print("Status:", r.status_code)
if r.ok:
    js = r.json()
    print("Keys:", js.keys())
    print("Bibliography entries:", len(js.get("bibliography", [])))
else:
    print("Error body:", r.text)


  results = list(search.results())
Your max_length is set to 150, but your input_length is only 136. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=68)
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


INFO:     127.0.0.1:37522 - "POST /generate_review HTTP/1.1" 200 OK
Status: 200
Keys: dict_keys(['review', 'bibliography'])
Bibliography entries: 5
