In [1]:
from datetime import datetime, timedelta
from sqlalchemy import create_engine, text, Table, MetaData
from urllib.parse import quote_plus
from dotenv import load_dotenv
from sqlalchemy.orm import sessionmaker
import os

class DBConnector:
    def __init__(self, db_type="postgresql"):
        load_dotenv()

        if db_type.lower() == "postgresql":
            self.__DB_INFO = {
                "host" : "127.0.0.1",
                "port" : 5432,
                "db" : "postgres",
                "user" : "postgres",
                "passwd" : quote_plus(os.getenv("POSTGRES_SQL_DB_PASSWORD"))
            }

            self.__engine = create_engine(
                f"postgresql+psycopg2://{self.__DB_INFO['user']}:{self.__DB_INFO['passwd']}@{self.__DB_INFO['host']}:{self.__DB_INFO['port']}/{self.__DB_INFO['db']}"
            )

        elif db_type.lower() == "mysql":
            self.__DB_INFO = {
                "host" : "127.0.0.1",
                "port" : 3306,
                "db" : "teamDB",
                "user" : "root",
                "passwd" : quote_plus(os.getenv("MYSQL_DB_PASSWORD"))
            }

            self.__engine = create_engine(
                f"mysql+pymysql://{self.__DB_INFO['user']}:{self.__DB_INFO['passwd']}@{self.__DB_INFO['host']}:{self.__DB_INFO['port']}/{self.__DB_INFO['db']}"
            )

    def session_connecter(self):
        Session = sessionmaker(bind=self.__engine, autoflush=False, autocommit=False)
        return Session


In [2]:
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session
from pgvector.sqlalchemy import Vector
from sqlalchemy import Column, String
from sqlalchemy.orm import declarative_base
Base = declarative_base()

# ORM 매핑
class CityRegion(Base):
    __tablename__ = "city_regions"

    city_code1 = Column(String, primary_key=True)
    city_code2 = Column(String, primary_key=True)
    city_str = Column(String)
    embedding = Column(Vector(1536))
  # pgvector 컬럼


# ---- DB 연결 ----
engine = DBConnector("postgresql")._DBConnector__engine


# ---- 검색 함수 ----
def get_similar_city_region(q_embedding, top_k=3):
    if hasattr(q_embedding, "tolist"):
        q_embedding = q_embedding.tolist()

    with Session(engine) as session:
        distance_expr = CityRegion.embedding.l2_distance(q_embedding)  # 또는 .cosine_distance()

        stmt = (
            select(
                CityRegion.city_code1,
                CityRegion.city_code2,
                CityRegion.city_str,
                distance_expr.label("distance")
            )
            .order_by(distance_expr)
            .limit(top_k)
        )

        result = session.execute(stmt).all()
        return result

# ---- 실행 예시 ----
q_emb = [0.0, 0.2, 0.6] + [0.0] * (1536 - 3)  # DB와 차원 맞춤

rows = get_similar_city_region(q_emb, top_k=3)

for row in rows:
    print(row.city_code1, row.city_code2, row.city_str, f"{row.distance:.4f}")

26 380 부산광역시 사하구 1.1451
26 500 부산광역시 수영구 1.1453
26 440 부산광역시 강서구 1.1467


In [4]:
from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

query = "서울종로"

# OpenAI로 벡터 생성
q_emb = client.embeddings.create(
    model="text-embedding-3-small",
    input=query
).data[0].embedding 
rows = get_similar_city_region(q_emb, top_k=3)

for row in rows:
    print(row.city_code1, row.city_code2, row.city_str, f"{row.distance:.4f}")


11 110 서울특별시 종로구 0.8697
11 000 서울특별시  0.9034
11 140 서울특별시 중구 0.9391
