-
-
Notifications
You must be signed in to change notification settings - Fork 34.8k
Description
import os
import re
import json
import sqlite3
from datetime import datetime
from typing import List
from fastapi import FastAPI, UploadFile, File, Depends, HTTPException
from fastapi.security import HTTPBearer
from passlib.context import CryptContext
from jose import jwt
from PIL import Image
import pytesseract
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
==========================
CONFIG
==========================
SECRET_KEY = "SUPREME_SECRET"
ALGORITHM = "HS256"
DB_FILE = "supreme.db"
BASE_DIR = "data"
os.makedirs(BASE_DIR, exist_ok=True)
app = FastAPI(title="Supreme Production Engine")
pwd_context = CryptContext(schemes=["bcrypt"])
security = HTTPBearer()
model = SentenceTransformer("all-MiniLM-L6-v2")
==========================
DATABASE
==========================
def init_db():
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute("""
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY,
filename TEXT,
content TEXT,
risk TEXT,
created_at TEXT
)
""")
conn.commit()
conn.close()
init_db()
==========================
OCR
==========================
def ocr_image(path):
img = Image.open(path)
return pytesseract.image_to_string(img, lang="tha+eng")
==========================
RISK CLASSIFIER
==========================
def classify(text):
if any(x in text for x in ["ไม่มี", "ตัด", "fail", "error"]):
return "RED"
elif "รอ" in text:
return "YELLOW"
return "GREEN"
==========================
VECTOR INDEX
==========================
dimension = 384
index = faiss.IndexFlatL2(dimension)
doc_embeddings = []
def add_to_vector(text):
embedding = model.encode([text])
index.add(np.array(embedding))
doc_embeddings.append(text)
==========================
API
==========================
@app.post("/upload")
async def upload(file: UploadFile = File(...)):
path = os.path.join(BASE_DIR, file.filename)
with open(path, "wb") as f:
f.write(await file.read())
if file.filename.lower().endswith(("png", "jpg", "jpeg")):
content = ocr_image(path)
else:
content = ""
risk = classify(content)
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute("INSERT INTO documents (filename, content, risk, created_at) VALUES (?, ?, ?, ?)",
(file.filename, content, risk, datetime.utcnow().isoformat()))
conn.commit()
conn.close()
add_to_vector(content)
return {"status": "stored", "risk": risk}
@app.get("/search")
def semantic_search(q: str):
q_embed = model.encode([q])
D, I = index.search(np.array(q_embed), 5)
results = [doc_embeddings[i] for i in I[0] if i < len(doc_embeddings)]
return {"results": results}
@app.get("/stats")
def stats():
conn = sqlite3.connect(DB_FILE)
c = conn.cursor()
c.execute("SELECT COUNT(*), risk FROM documents GROUP BY risk")
rows = c.fetchall()
conn.close()
return {"distribution": rows}