In [None]:
!pip install streamlit pyvi kenlm pyngrok

Collecting streamlit
  Using cached streamlit-1.47.1-py3-none-any.whl.metadata (9.0 kB)
Collecting pyvi
  Using cached pyvi-0.1.1-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting kenlm
  Using cached kenlm-0.3.0.tar.gz (427 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pyngrok
  Using cached pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Using cached watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Using cached pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting sklearn-crfsuite (from pyvi)
  Using cached sklearn_crfsuite-0.5.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-crfsuite>=0.9.7 (from sklearn-crfsuite->pyvi)
  Using cached python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.w

In [3]:
%%writefile app.py
import streamlit as st
import kenlm
from pyvi import ViTokenizer
import os

@st.cache_resource
def load_model():
  model_path = "/content/3-gram-lm.binary"
  if not os.path.exists(model_path):
    st.error("File model KenLM không tồn tại")
    return None
  return kenlm.Model(model_path)

@st.cache_data
def load_data():
  data_path = "/content/Viet74K.txt"
  if not os.path.exists(data_path):
    st.error("File data không tồn tại")
    return []

  with open(data_path, "r", encoding="utf-8") as f:
    return [line.strip() for line in f if line.strip()]

def suggest_next_words(prefix, model, data, top_n=3):
  if not prefix.strip():
    return []
  tokenized_prefix = ViTokenizer.tokenize(prefix).strip()
  scores = []
  for word in data:
    sentence = tokenized_prefix + " " + word
    score = model.score(sentence)
    scores.append((word, score))
  scores.sort(key=lambda x: x[1], reverse=True)
  return scores[:top_n]

st.title("Vietnamese Suggestion for Autocomplete")
st.write("Nhập một cụm từ và nhận gợi ý từ tiếp theo sử dụng mô hình KenLM")

input = st.text_input("Nhập cụm từ (ví dụ: 'ăn cơm với'): ", placeholder="Nhập cụm từ...")
top_n = st.slider("Số lượng từ gợi ý: ", min_value=1, max_value=5, value=3)

if st.button("Gợi ý"):
  model = load_model()
  data = load_data()
  if not model or not data:
    st.stop()

  if input.strip():
    with st.spinner("Đang xử lý..."):
      suggestions = suggest_next_words(input, model, data, top_n)
      if suggestions:
        st.success("Kết quả gợi ý:")
        for i, (word, score) in enumerate(suggestions, 1):
          st.write(f"{i}. {word} (score: {score:.2f})")
      else:
        st.warning("Không tìm thấy từ tiếp theo phù hợp:")
  else:
    st.error("Vui lòng nhập một từ!")

st.markdown("---")

Writing app.py


In [4]:
from pyngrok import ngrok

!ngrok authtoken 30nuioI87pSwDlc4HKht0NpkTVq_5zTmzA2tAfrae4JKWbGwU

public_url = ngrok.connect(8501)
print(f"Truy cập giao diện Streamlit tại: {public_url}")

# Chạy Streamlit
!streamlit run app.py

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Truy cập giao diện Streamlit tại: NgrokTunnel: "https://cfb67d642dd4.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.230.32.135:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m
