In [1]:
!pip install fastapi uvicorn transformers sentence-transformers pyngrok


Collecting fastapi
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.2-py3-none-any.whl.metadata (8.4 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Downloading fastapi-0.115.6-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.2-py3-none-any.whl (22 kB)
Downloading starlette-0.41.3-py3-none-any.whl (73 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, pyngrok, sta

!ngrok authtoken YOUR_NGROK_AUTH_TOKEN

In [2]:
!ngrok authtoken YOUR_NGROK_AUTH_TOKEN

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [3]:
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from sentence_transformers import SentenceTransformer, util
from fastapi.responses import HTMLResponse
import torch
import json

app = FastAPI()

@app.get("/", response_class=HTMLResponse)
def serve_html():
    with open("/content/drive/MyDrive/Colab Notebooks/index.html", "r", encoding="utf-8") as file:
        return file.read()

@app.get("/ngrok-url")
def get_ngrok_url():
    global ngrok_url
    return {"url": ngrok_url}

model_path = "/content/drive/MyDrive/qa_model"
dataset_path = "/content/drive/MyDrive/qa_dataset.json"

tokenizer = AutoTokenizer.from_pretrained(model_path)
qa_model = AutoModelForQuestionAnswering.from_pretrained(model_path)
embedding_model = SentenceTransformer('emrecan/bert-base-turkish-cased-mean-nli-stsb-tr')

with open(dataset_path, 'r', encoding='utf-8') as f:
    dataset = json.load(f)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class QuestionRequest(BaseModel):
    question: str

# Extract contexts and create combined texts for semantic search
questions = [item["question"] for item in dataset]
contexts = [item["context"] for item in dataset]

# Calculate question and context embeddings separately
question_embeddings = embedding_model.encode(questions, convert_to_tensor=True)
context_embeddings = embedding_model.encode(contexts, convert_to_tensor=True)

combined_texts = []
for item in dataset:
    combined_text = f"{item['question']} {item['context']}"
    combined_texts.append(combined_text)

# Pre-compute embeddings for all contexts
context_embeddings = embedding_model.encode(combined_texts, convert_to_tensor=True)

def get_top_contexts(similarities, n=2):
    """
    Get top N most similar contexts based on similarity scores
    Returns None if no context meets the minimum similarity threshold
    """
    top_k_values, top_k_idx = torch.topk(similarities, k=min(n, len(similarities)))
    
    # Filter contexts with similarity score > 0.2
    valid_contexts = []
    for score, idx in zip(top_k_values, top_k_idx):
        if score > 0.2:
            valid_contexts.append(dataset[idx.item()]["context"])
    
    if not valid_contexts:
        return None
        
    return " ".join(valid_contexts)

def get_combined_similarity(question_sim, context_sim, alpha=0.6):
    """
    Combines question and context similarities with weighted merging.
    Args:
        question_sim: Question similarity scores
        context_sim: Context similarity scores
        alpha: Weight for question similarity (range 0-1)
    """
    return alpha * question_sim + (1 - alpha) * context_sim

@app.post("/ask")
def ask_question(request: QuestionRequest):
    """
    Main endpoint for question answering:
    1. Find relevant contexts using semantic search
    2. Use QA model to extract answer from contexts
    """
    question = request.question.strip()
    
    # Calculate question embedding
    question_embedding = embedding_model.encode(question, convert_to_tensor=True)
    
    # Calculate question and context similarities
    question_similarities = util.pytorch_cos_sim(question_embedding, question_embeddings)[0]
    context_similarities = util.pytorch_cos_sim(question_embedding, context_embeddings)[0]
    
    # Combined similarity score
    combined_similarities = get_combined_similarity(question_similarities, context_similarities)
    
    # Choose the best context
    best_context = get_top_contexts(combined_similarities)

    if best_context is None:
        return {"answer": "Bu soru için uygun bir cevap bulamadım."}

    # Generate answer using QA model
    inputs = tokenizer(question, best_context, return_tensors="pt", truncation=True)
    with torch.no_grad():
        outputs = qa_model(**inputs)

    # Extract answer span from model outputs
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits
    start_idx = torch.argmax(start_logits)
    end_idx = torch.argmax(end_logits)

    if end_idx < start_idx:
        return {"answer": "Anlaşılır bir cevap bulunamadı."}

    # Decode answer tokens to text
    answer_tokens = inputs["input_ids"][0][start_idx : end_idx + 1]
    answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)

    return {
        "question": question,
        "context": best_context,
        "answer": answer
    }

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.89k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/693 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/431 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/251k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/498k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [4]:
from pyngrok import ngrok
import threading
import uvicorn

# Start Ngrok Connection
def start_ngrok():
    global ngrok_url
    ngrok_tunnel = ngrok.connect(8010)  # Port where FastAPI is running
    ngrok_url = ngrok_tunnel.public_url
    print(f"Ngrok Public URL: {ngrok_url}")

# Launch Uvicorn
def start_uvicorn():
    uvicorn.run(app, host="0.0.0.0", port=8010)

# Start threads
ngrok_thread = threading.Thread(target=start_ngrok)
uvicorn_thread = threading.Thread(target=start_uvicorn)

ngrok_thread.start()
uvicorn_thread.start()


INFO:     Started server process [1951]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)


!pkill uvicorn
!pkill ngrok