In [1]:
!pip install -q fastapi uvicorn transformers torch pyngrok nest-asyncio


In [None]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import uvicorn
from pyngrok import ngrok
import nest_asyncio
import threading
from typing import Optional

nest_asyncio.apply()

app = FastAPI(title="Metin Oluşturma API", docs_url="/docs", redoc_url=None)

class PromptRequest(BaseModel):
    prompt: str
    max_length: Optional[int] = 100
    temperature: Optional[float] = 0.7
    top_p: Optional[float] = 0.9
    top_k: Optional[int] = 50
    repetition_penalty: Optional[float] = 1.0

MODEL_NAME = "gpt2"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)

@app.post("/generate", response_model=dict)
async def generate_text(request: PromptRequest):
    inputs = tokenizer(request.prompt, return_tensors="pt").to(DEVICE)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=request.max_length,
            temperature=request.temperature,
            top_p=request.top_p,
            top_k=request.top_k,
            repetition_penalty=request.repetition_penalty,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_text = full_text[len(request.prompt):].strip()
    return {
        "generated_text": generated_text,
        "original_prompt": request.prompt,
        "parameters": request.dict(exclude={"prompt"})
    }

def start_server():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info", access_log=False)

def setup_ngrok():
    NGROK_AUTH_TOKEN = "YOUR TOKEN"
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    tunnel = ngrok.connect(8000, bind_tls=True)
    print(f"Ngrok tüneli: {tunnel.public_url}")
    print(f"API docs: {tunnel.public_url}/docs")
    return tunnel

if __name__ == "__main__":
    import nest_asyncio
    nest_asyncio.apply()

    server_thread = threading.Thread(target=start_server, daemon=True)
    server_thread.start()

    setup_ngrok()

    server_thread.join()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

INFO:     Started server process [7201]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Ngrok tüneli: https://eda1-35-233-250-126.ngrok-free.app
API docs: https://eda1-35-233-250-126.ngrok-free.app/docs


<ipython-input-2-c568a018809f>:48: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  "parameters": request.dict(exclude={"prompt"})
