In [1]:
# backend.py
# --- Step 1: Install necessary libraries ---
!pip install fastapi uvicorn pyngrok diffusers transformers accelerate torch Pillow

import torch
from fastapi import FastAPI
from pydantic import BaseModel
from diffusers import StableDiffusionPipeline
import base64
from io import BytesIO
from PIL import Image
import uvicorn
from pyngrok import ngrok, conf
import threading
import os

# --- Optional: Add your ngrok authtoken if you have one ---
# Replace "YOUR_NGROK_AUTHTOKEN" with your actual token from https://dashboard.ngrok.com/get-started/your-authtoken
conf.get_default().auth_token = "31QAivzbppt5jb8ozM3TkemWwlN_6BKgSbrv5qYALrgK7pZ3Q"

print("Starting backend server...")

# ---- Step 2: Load the Stable Diffusion model AND your LoRA ----
print("Loading Stable Diffusion model...")
# Define the base model and your LoRA model from the Hugging Face Hub
base_model_id = "runwayml/stable-diffusion-v1-5"
lora_model_id = "nikhiljose7/lora-sdv1-5-minnal-murali"

# Use float16 for faster inference and less memory usage.
pipe = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16)

print(f"Loading LoRA weights from: {lora_model_id}")
# Load the LoRA weights into the base model
pipe.load_lora_weights(lora_model_id)

# Move the model to the GPU if available.
if torch.cuda.is_available():
    print("Moving model to GPU...")
    pipe = pipe.to("cuda")
else:
    print("CUDA not available. Using CPU (this will be very slow).")
    pipe = pipe.to("cpu")

# ---- Step 3: Set up the FastAPI app ----
app = FastAPI()

# Define the request body structure.
# The key must be "caption".
class Prompt(BaseModel):
    caption: str

@app.get("/")
def read_root():
    return {"status": "ok"}

# Define the image generation endpoint.
@app.post("/generate")
def generate_image(data: Prompt):
    """
    Generates an image from a text caption.
    """
    # --- MODIFIED: Add the trigger word to the prompt ---
    # This is crucial to activate your LoRA model's concept.
    trigger_word = "Minnal Murali"
    prompt_with_trigger = f"{trigger_word}, {data.caption}"

    print(f"Received caption: {data.caption}")
    print(f"Using full prompt: {prompt_with_trigger}")

    try:
        # Generate the image using the modified prompt.
        image = pipe(prompt_with_trigger).images[0]

        # Convert the PIL image to a base64 string to send via JSON.
        buffered = BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

        print("Image generated successfully.")
        return {"image_base64": img_str}
    except Exception as e:
        print(f"An error occurred: {e}")
        return {"error": str(e)}, 500


# ---- Step 4: Run the server using Uvicorn and Ngrok ----
# We run Uvicorn in a separate thread so that we can start ngrok in the main thread.
def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Open a public tunnel to the local server
public_url = ngrok.connect(8000)
print("------------------------------------------------")
print(f"✅ Public API URL: {public_url}")
print("------------------------------------------------")
print("Copy this URL and paste it into the API_URL variable in your frontend.py script.")


# Start the FastAPI server in the background.
thread = threading.Thread(target=run_app, daemon=True)
thread.start()

# Keep the main thread alive to keep ngrok running.
try:
    while True:
        pass
except KeyboardInterrupt:
    print("Shutting down server and ngrok tunnel.")
    ngrok.disconnect(public_url)

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting n

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

safety_checker/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Loading LoRA weights from: nikhiljose7/lora-sdv1-5-minnal-murali


pytorch_lora_weights.safetensors:   0%|          | 0.00/3.23M [00:00<?, ?B/s]



Moving model to GPU...
------------------------------------------------
✅ Public API URL: NgrokTunnel: "https://9b1c0b2ee9d7.ngrok-free.app" -> "http://localhost:8000"
------------------------------------------------
Copy this URL and paste it into the API_URL variable in your frontend.py script.


INFO:     Started server process [281]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


Received caption: Minnal Murali super hero landing
Using full prompt: Minnal Murali, Minnal Murali super hero landing


  0%|          | 0/50 [00:00<?, ?it/s]

Image generated successfully.
INFO:     34.127.33.101:0 - "POST /generate HTTP/1.1" 200 OK
Received caption: Minnal Murali super hero red costume
Using full prompt: Minnal Murali, Minnal Murali super hero red costume


  0%|          | 0/50 [00:00<?, ?it/s]

Image generated successfully.
INFO:     34.127.33.101:0 - "POST /generate HTTP/1.1" 200 OK
Shutting down server and ngrok tunnel.


PyngrokNgrokURLError: ngrok client exception, URLError: [Errno 111] Connection refused