In [None]:
!pip install uvicorn nest_asyncio tortoise-orm pymysql pythainlp gensim asyncmy beautifulsoup4

In [128]:
import nest_asyncio
nest_asyncio.apply()

In [129]:
from pythainlp import word_vector
from pythainlp.tokenize import word_tokenize
import numpy as np

model = word_vector.WordVector("thai2fit_wv").get_model()

def get_vector(text: str):
    tokens = word_tokenize(text, keep_whitespace=False)
    vectors = [model[word] for word in tokens if word in model]
    return np.mean(vectors, axis=0).tolist() if vectors else [0]*model.vector_size


In [130]:
from tortoise.models import Model
from tortoise import fields
from enum import Enum


# ✅ ประกาศ Enum ต่าง ๆ
class TypeEnum(str, Enum):
    novel = "novel"
    chat = "chat"

class YesNo(str, Enum):
    yes = "yes"
    no = "no"

class StatusEnum(str, Enum):
    publish = "publish"
    private = "private"
    delete = "delete"

class EndEnum(str, Enum):
    end = "end"
    not_end = "not_end"


# ✅ ประกาศ Model
class BookTran(Model):
    id = fields.IntField(pk=True)
    bookID = fields.CharField(max_length=255)
    type = fields.CharEnumField(enum_type=TypeEnum, default=TypeEnum.novel)
    img = fields.CharField(max_length=255)
    name = fields.CharField(max_length=255)
    title = fields.TextField()
    des = fields.TextField()
    tag = fields.TextField()
    cat1 = fields.IntField(null=True)
    cat2 = fields.IntField(null=True)
    rate_img = fields.CharEnumField(enum_type=YesNo, default=YesNo.no)
    rate = fields.CharEnumField(enum_type=YesNo, default=YesNo.no)
    userID = fields.CharField(max_length=255)
    status = fields.CharEnumField(enum_type=StatusEnum, default=StatusEnum.publish)
    view = fields.IntField(default=0)
    end = fields.CharEnumField(enum_type=EndEnum, default=EndEnum.not_end)
    bgimg = fields.CharField(max_length=255, null=True)
    recommend = fields.CharEnumField(enum_type=YesNo, default=YesNo.no)
    noti_add = fields.CharEnumField(enum_type=YesNo, default=YesNo.yes)
    show_review = fields.CharEnumField(enum_type=YesNo, default=YesNo.yes)
    show_write = fields.CharEnumField(enum_type=YesNo, default=YesNo.yes)
    use_freecoin = fields.CharEnumField(enum_type=YesNo, default=YesNo.yes)
    fast_status = fields.CharEnumField(enum_type=YesNo, default=YesNo.no)
    createdAt = fields.DatetimeField(auto_now_add=True)
    updatedAt = fields.DatetimeField(auto_now=True)

    class Meta:
        table = "book_tran"


In [131]:
from tortoise import Tortoise

async def init_db():
    await Tortoise.init(
        db_url="mysql://dbreadevewrite:aXRALCBMRWNn8f2j@maindb.eveebook.com:3342/readeve",
        modules={"models": ["__main__"]},
    )
    # await Tortoise.generate_schemas()

await init_db()


In [132]:
books = await BookTran.filter(status="publish")

In [133]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from pythainlp.tokenize import word_tokenize
import numpy as np

# โหลดโมเดล thai2vec
model = KeyedVectors.load_word2vec_format("thai2vec.bin", binary=True)

def get_vector(text: str):
    tokens = word_tokenize(text, keep_whitespace=False)
    vectors = [model[word] for word in tokens if word in model]
    return np.mean(vectors, axis=0) if vectors else np.zeros(model.vector_size)


In [138]:
from bs4 import BeautifulSoup

def get_clean_text_from_html(html: str) -> str:
    soup = BeautifulSoup(html, "html.parser")
    return soup.get_text(separator=" ", strip=True)

In [155]:
books = await BookTran.filter(status="publish")

# 2. สร้าง DataFrame พร้อมเวกเตอร์
data = []
for b in books:
    tag_text = b.tag.replace(",", " ") if b.tag else ""
    name_text = b.name or ""
    des_html = b.des or ""
    des_text = get_clean_text_from_html(des_html)

    combined_text = f"{name_text} {tag_text} {des_text}".strip()
    vec = get_vector(combined_text)

    data.append({
        "bookID": b.bookID,
        "name": name_text,
        "tag": tag_text,
        "des": des_html,
        "vector": vec
    })

df = pd.DataFrame(data)

In [160]:
def recommend_by_keyword(keyword: str, df: pd.DataFrame, topk=5):
    q_vec = get_vector(keyword)

    def build_text(row):
        name = row.get("name", "")
        tag = row.get("tag", "").replace(",", " ")
        raw_des = row.get("des", "")
        clean_des = get_clean_text_from_html(raw_des)
        return f"{name} {tag} {clean_des}"

    df["text"] = df.apply(build_text, axis=1)
    df["vector"] = df["text"].apply(get_vector)
    df["score"] = df["vector"].apply(lambda v: cosine_similarity([q_vec], [v])[0][0])

    return df.sort_values("score", ascending=False).head(topk)[["bookID", "name", "tag", "des", "score"]]

In [None]:
recommend_by_keyword("เวลา", df, topk=10)

In [171]:
def recommend_random_similar(df, base_keywords: str, topn: int = 30, pick: int = 5):
    base_vec = get_vector(base_keywords)
    
    # คำนวณ similarity แล้วเลือก Top N ที่คล้ายที่สุด
    df["score"] = df["vector"].apply(lambda v: cosine_similarity([base_vec], [v])[0][0])
    similar_df = df.sort_values("score", ascending=False).head(topn)

    # ✅ สุ่มจาก topn แล้วเรียงผลลัพธ์ตาม score ใหม่ (มาก → น้อย)
    result = similar_df.sample(n=min(pick, len(similar_df)))
    return result.sort_values("score", ascending=False)[["bookID", "name", "tag", "score"]]


In [185]:
user_tags = "โรแมนติก ย้อนเวลา bl"  # มาจากประวัติการอ่าน
recommend_random_similar(df, base_keywords=user_tags)


Unnamed: 0,bookID,name,tag,score
50,BOOK202506261309582JH,ศึกอ่อยสะท้านใจ SS2 รักครั้งนี้ไม่ต้องแอบ,ศึกอ่อยสะท้านใจ SS2 อ่อย นักศึกษา มหาวิทยาลัย...,0.386317
47,BOOK20250624083316Y7W,ปิ๊งรักหนุ่มข้างบ้าน,โรแมนติก แอบรัก ปิ๊งรัก ฟีลกู๊ด หนุ่มข้างบ้าน ...,0.381582
23,BOOK20250618180949Eyo,Hot stuff,18+ อีโรติก 25+ คุณหนู บอดี้การ์ด มาเฟีย,0.370684
58,BOOK20250628150736Rvx,คุณหนูกับพ่อบ้านทั้งเจ็ด,กาสะลอง คุณหนู พ่อบ้าน ทั้งเจ็ด 7p 3p ntr คนรั...,0.370557
45,BOOK20250623071643bcH,[Fanfiction Harry Potter รุ่นลูก] เด็กหญิงผู้ร...,เด็กหญิงผู้รอดชีวิต FanfictionHarryPotter Harr...,0.367659


In [1]:
import faiss
import numpy as np

vec = np.random.rand(1, 300).astype('float32')
index = faiss.IndexFlatL2(300)
index.add(vec)
D, I = index.search(vec, k=1)
print(D, I)


[[0.]] [[0]]
