In [1]:
%%capture
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
%%capture
# installing offline dependencies
!pip install -U /kaggle/input/faiss-gpu-173-python310/faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!cp -rf /kaggle/input/sentence-transformers-222/sentence-transformers /kaggle/working/sentence-transformers
!pip install -U /kaggle/working/sentence-transformers
!pip install -U /kaggle/input/blingfire-018/blingfire-0.1.8-py3-none-any.whl

!pip install --no-index --no-deps /kaggle/input/llm-whls/transformers-4.31.0-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/llm-whls/peft-0.4.0-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/llm-whls/datasets-2.14.3-py3-none-any.whl
!pip install --no-index --no-deps /kaggle/input/llm-whls/trl-0.5.0-py3-none-any.whl

In [3]:
%%capture
!pip install fastapi nest-asyncio pyngrok uvicorn
!pip install accelerate bitsandbytes pydantic

In [4]:
import os
import gc
import pandas as pd
import numpy as np
import re
from tqdm.auto import tqdm
import blingfire as bf
from __future__ import annotations

from collections.abc import Iterable

import faiss
from faiss import write_index, read_index

from sentence_transformers import SentenceTransformer

import torch
import ctypes
libc = ctypes.CDLL("libc.so.6")



In [35]:
SIM_MODEL = '/kaggle/input/sentencetransformers-allminilml6v2/sentence-transformers_all-MiniLM-L6-v2'
DEVICE = 0
MAX_LENGTH = 384
BATCH_SIZE = 16

WIKI_PATH = "/kaggle/input/wikipedia-20230701"
wiki_files = os.listdir(WIKI_PATH)

model_dir = "/kaggle/input/llm-se-debertav3-large"

# Fast API

In [40]:
%%writefile main.py
from __future__ import annotations
from fastapi import FastAPI
from pydantic import BaseModel, validator
from fastapi.middleware.cors import CORSMiddleware

import os
import gc
import re
from tqdm.auto import tqdm
import blingfire as bf

from collections.abc import Iterable

import faiss
from faiss import write_index, read_index

from sentence_transformers import SentenceTransformer

import ctypes

from peft import LoraConfig, get_peft_model

from dataclasses import dataclass
from typing import Optional, Union

import torch
import numpy as np
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy

SIM_MODEL = '/kaggle/input/sentencetransformers-allminilml6v2/sentence-transformers_all-MiniLM-L6-v2'
DEVICE = 0
MAX_LENGTH = 384
BATCH_SIZE = 16

WIKI_PATH = "/kaggle/input/wikipedia-20230701"
wiki_files = os.listdir(WIKI_PATH)

model_dir = "/kaggle/input/llm-se-debertav3-large"

class MCQ(BaseModel):
    question: str
    choices: list[str]

    @validator("choices")
    def validate_choices_length(cls, choices):
        min_length = 1  # Minimum allowed length
        max_length = 5  # Maximum allowed length

        if len(choices) < min_length:
            raise ValueError(f"Choices list must have at least {min_length} item(s)")
        if len(choices) > max_length:
            raise ValueError(f"Choices list can have at most {max_length} items")

        return choices

    @validator("question")
    def validate_question_length(cls, question):
        min_length = 1  # Minimum allowed length
        max_length = 512  # Maximum allowed length

        if len(question) < min_length:
            raise ValueError(f"Question must have at least {min_length} character(s)")
        if len(question) > max_length:
            raise ValueError(f"Question can have at most {max_length} characters")

        return question

    
# class SearchResult(BaseModel):
#     search_score: list[float]
#     search_index: list[int]


app = FastAPI(
    title="Scique API",
    description="An API to answer any MCQ from wikipedia without context and other questions with context.",
    version="0.1.0",
    docs_url="/docs",
    redoc_url="/redoc",
)

origins = ["http://localhost:8000", "localhost:8000"]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# eluwa_model: AutoModelForCausalLM | None = None
# tokenizer: AutoTokenizer | None = None

sentence_model: SentenceTransformer | None = None
sentence_index: faiss.Index | None = None


# load model artifacts on startup of the application to reduce latency
@app.on_event("startup")
async def startup_event():
    global sentence_model, sentence_index

    sentence_model = SentenceTransformer(SIM_MODEL, device="cuda")
    sentence_model.max_seq_length = MAX_LENGTH
    sentence_model = sentence_model.half()  # half precision

    sentence_index = read_index(
        "/kaggle/input/wikipedia-2023-07-faiss-index/wikipedia_202307.index"
    )


@app.get("/", tags=["root"])
async def root() -> str:
    return "Welcome to Scique API!"


@app.put("/search_wiki/", tags=["search_wiki"])
async def ask_mcq(mcq: MCQ | None):
    global sentence_model, sentence_index, wiki_parquet

    # create string by joining the question and choices with a space separator
#     question = mcq.question + " " + " ".join(mcq.choices)
    question = mcq.question

    prompt_embeddings = sentence_model.encode(
        [mcq.question],
        batch_size=BATCH_SIZE,
        device=DEVICE,
        show_progress_bar=True,
        convert_to_tensor=True,
        normalize_embeddings=True,
    )

    prompt_embeddings = prompt_embeddings.detach().cpu().numpy()
    _ = gc.collect()

    ## Get the top 3 pages that are likely to contain the topic of interest
    search_score, search_index = sentence_index.search(prompt_embeddings, 3)
    
    return search_score.tolist(), search_index.tolist()

Overwriting main.py


In [36]:
!ngrok authtoken 2UZ3hYpFjXqzDaGMSCCib3aytam_5TZ46i8Au99qgkCcQwCQC

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [18]:
# !ngrok http --domain=hardy-lasting-ghost.ngrok-free.app 8000

In [None]:
import nest_asyncio
import subprocess
import uvicorn


# specify a port
port = 8000

# Set your custom subdomain here
custom_subdomain = "hardy-lasting-ghost.ngrok-free.app"

# Start the FastAPI app using uvicorn
uvicorn_command = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", str(port)]
uvicorn_process = subprocess.Popen(uvicorn_command)

# Run the ngrok command
ngrok_command = ["ngrok", "http", f"--domain={custom_subdomain}", str(port)]
ngrok_process = subprocess.Popen(ngrok_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

nest_asyncio.apply()

# Wait for ngrok to generate the public URL
ngrok_output = ngrok_process.stdout.readline().strip()
while "ngrok.io" not in ngrok_output:
    ngrok_output = ngrok_process.stdout.readline().strip()

# Display the public URL
print("Custom subdomain URL:", ngrok_output)

try:
    # Wait for the FastAPI app and ngrok processes to finish
    uvicorn_process.wait()
    ngrok_process.terminate()
except KeyboardInterrupt:
    # Handle keyboard interrupt gracefully
    uvicorn_process.terminate()
    ngrok_process.terminate()

In [17]:
# import nest_asyncio
# from pyngrok import ngrok
# import uvicorn

# # specify a port
# port = 8000

# # Set your custom subdomain here
# custom_subdomain = "scique"

# # Start ngrok with the custom subdomain
# ngrok_tunnel = ngrok.connect(addr="8000", proto="http", hostname=custom_subdomain)

# nest_asyncio.apply()

# # where we can visit our FastAPI app
# print('Custom subdomain URL:', ngrok_tunnel.public_url)

# # finally run the app
# uvicorn.run(app, port=port)


In [None]:
# import nest_asyncio
# from pyngrok import ngrok
# import uvicorn

# # specify a port
# port = 8000
# ngrok_tunnel = ngrok.connect(port)

# nest_asyncio.apply()

# # where we can visit our fastAPI app
# print('Public URL:', ngrok_tunnel.public_url)

# # finally run the app
# uvicorn.run(app, port=port)
