In [1]:
import json
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn_crfsuite import CRF
from sklearn_crfsuite.metrics import flat_classification_report
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense, TimeDistributed, Bidirectional, Dropout
from tensorflow.keras.utils import to_categorical
from pyvi import ViTokenizer


In [2]:

# ==============================================================================
# 1. LOAD D·ªÆ LI·ªÜU & CHIA T·∫¨P (80 - 10 - 10)
# ==============================================================================
input_file = 'train_bio.json'

print(f"--- 1. LOADING DATA ---")
try:
    with open(input_file, 'r', encoding='utf-8') as f:
        all_sents = json.load(f)
    print(f"T·ªïng s·ªë c√¢u: {len(all_sents)}")
except FileNotFoundError:
    print(f"L·ªói: Kh√¥ng t√¨m th·∫•y {input_file}")
    exit()

# Chia Train (80%) v√† Temp (20%)
train_sents, temp_sents = train_test_split(all_sents, test_size=0.2, random_state=42)
# Chia Temp th√†nh Val (10% t·ªïng) v√† Test (10% t·ªïng) -> Chia ƒë√¥i temp_sents
val_sents, test_sents = train_test_split(temp_sents, test_size=0.5, random_state=42)

print(f"Train size: {len(train_sents)} (80%)")
print(f"Val size:   {len(val_sents)} (10%)")
print(f"Test size:  {len(test_sents)} (10%)")


--- 1. LOADING DATA ---
T·ªïng s·ªë c√¢u: 1136
Train size: 908 (80%)
Val size:   114 (10%)
Test size:  114 (10%)


In [16]:
import pickle
import os

In [None]:

# ==============================================================================
# 2. FEATURE ENGINEERING (CHO CLASSICAL ML)
# ==============================================================================
def word2features(sent, i):
    word = sent[i][0]
    features = {
        'bias': 1.0,
        'word.lower()': word.lower(),
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'word.has_underscore': '_' in word,
    }
    if i > 0:
        word1 = sent[i-1][0]
        features.update({'-1:word.lower()': word1.lower(), '-1:word.istitle()': word1.istitle()})
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        word1 = sent[i+1][0]
        features.update({'+1:word.lower()': word1.lower(), '+1:word.istitle()': word1.istitle()})
    else:
        features['EOS'] = True
    return features

def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, label in sent]

print("\n--- 2. PREPARING FEATURES ---")
# T·∫°o d·ªØ li·ªáu cho c√°c m√¥ h√¨nh truy·ªÅn th·ªëng
X_train = [sent2features(s) for s in train_sents]
y_train = [sent2labels(s) for s in train_sents]

X_test = [sent2features(s) for s in test_sents]
y_test = [sent2labels(s) for s in test_sents]
# L∆∞u √Ω: LR v√† RF kh√¥ng d√πng chu·ªói, n√™n ta kh√¥ng c·∫ßn quan t√¢m th·ª© t·ª± c√¢u trong validation cho vi·ªác training c∆° b·∫£n




--- 2. PREPARING FEATURES ---
Saved: crf_model.pkl


In [19]:

# ==============================================================================
# 3. HU·∫§N LUY·ªÜN & ƒê√ÅNH GI√Å: CRF
# ==============================================================================
print("\n--- 3. TRAINING CRF ---")
crf = CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100)
crf.fit(X_train, y_train)

print(">>> REPORT CRF (tr√™n t·∫≠p Test):")
y_pred_crf = crf.predict(X_test)
print(flat_classification_report(y_test, y_pred_crf, digits=4))
save_dir = 'saved_models'
os.makedirs(save_dir, exist_ok=True)
with open(os.path.join(save_dir, 'crf_model.pkl'), 'wb') as f:
    pickle.dump(crf, f)
print("Saved: crf_model.pkl")



--- 3. TRAINING CRF ---
>>> REPORT CRF (tr√™n t·∫≠p Test):
               precision    recall  f1-score   support

       B-CHAR     0.8684    0.8462    0.8571        39
        B-LOC     0.6667    0.6479    0.6571        71
        B-ORG     0.7846    0.7083    0.7445        72
        B-PER     0.8169    0.8529    0.8345        68
B-TIME / DATE     0.3704    0.5882    0.4545        17
  B-TIME/DATE     0.6000    0.5085    0.5505        59
       B-WORK     0.5833    0.2333    0.3333        30
       I-CHAR     0.9286    0.5000    0.6500        26
        I-LOC     0.4667    0.3889    0.4242        18
        I-ORG     0.8480    0.7737    0.8092       137
        I-PER     0.8000    0.9524    0.8696        21
I-TIME / DATE     0.3333    0.4444    0.3810         9
  I-TIME/DATE     0.7215    0.6786    0.6994        84
       I-WORK     0.6389    0.3108    0.4182        74
            O     0.9178    0.9658    0.9412      1665

     accuracy                         0.8674      2390
   

In [20]:


# ==============================================================================
# 4. HU·∫§N LUY·ªÜN & ƒê√ÅNH GI√Å: LOGISTIC REGRESSION & RANDOM FOREST
# ==============================================================================
print("\n--- 4. TRAINING LR & RF ---")
# Flatten d·ªØ li·ªáu (Chuy·ªÉn t·ª´ list of lists sang list ph·∫≥ng)
X_train_flat = [item for sublist in X_train for item in sublist]
y_train_flat = [item for sublist in y_train for item in sublist]

X_test_flat = [item for sublist in X_test for item in sublist]
y_test_flat = [item for sublist in y_test for item in sublist]

# Vector h√≥a
v = DictVectorizer(sparse=False)
X_train_vec = v.fit_transform(X_train_flat)
X_test_vec = v.transform(X_test_flat) # Ch·ªâ transform, kh√¥ng fit l·∫°i

# --- Logistic Regression ---
lr = LogisticRegression(max_iter=500, multi_class='ovr', n_jobs=-1)
lr.fit(X_train_vec, y_train_flat)
print(">>> REPORT Logistic Regression:")
print(classification_report(y_test_flat, lr.predict(X_test_vec), digits=4))

# --- Random Forest ---
rf = RandomForestClassifier(n_estimators=50, n_jobs=-1, random_state=42)
rf.fit(X_train_vec, y_train_flat)
print(">>> REPORT Random Forest:")
print(classification_report(y_test_flat, rf.predict(X_test_vec), digits=4))

with open(os.path.join(save_dir, 'vectorizer.pkl'), 'wb') as f:
    pickle.dump(v, f)
print("Saved: vectorizer.pkl")

with open(os.path.join(save_dir, 'lr_model.pkl'), 'wb') as f:
    pickle.dump(lr, f)
print("Saved: lr_model.pkl")

with open(os.path.join(save_dir, 'rf_model.pkl'), 'wb') as f:
    pickle.dump(rf, f)
print("Saved: rf_model.pkl")


--- 4. TRAINING LR & RF ---




>>> REPORT Logistic Regression:
               precision    recall  f1-score   support

       B-CHAR     0.8056    0.7436    0.7733        39
        B-LOC     0.7015    0.6620    0.6812        71
        B-ORG     0.7500    0.6250    0.6818        72
        B-PER     0.8108    0.8824    0.8451        68
B-TIME / DATE     0.3846    0.2941    0.3333        17
  B-TIME/DATE     0.4783    0.3729    0.4190        59
       B-WORK     0.7273    0.2667    0.3902        30
       I-CHAR     0.6429    0.3462    0.4500        26
        I-LOC     0.6250    0.2778    0.3846        18
        I-ORG     0.8352    0.5547    0.6667       137
        I-PER     0.7407    0.9524    0.8333        21
I-TIME / DATE     0.6667    0.2222    0.3333         9
  I-TIME/DATE     0.6667    0.6190    0.6420        84
       I-WORK     0.8000    0.1622    0.2697        74
            O     0.8798    0.9760    0.9254      1665

     accuracy                         0.8439      2390
    macro avg     0.7010    0.5

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [21]:


# ==============================================================================
# 5. HU·∫§N LUY·ªÜN & ƒê√ÅNH GI√Å: BI-LSTM
# ==============================================================================
print("\n--- 5. TRAINING BI-LSTM ---")

# T·∫°o t·ª´ ƒëi·ªÉn t·ª´ TO√ÄN B·ªò d·ªØ li·ªáu ƒë·ªÉ tr√°nh l·ªói out-of-vocab
words = list(set([t[0] for sent in all_sents for t in sent]))
tags = list(set([t[1] for sent in all_sents for t in sent]))

if "UNK" not in words: words.append("UNK")
if "PAD" not in words: words.append("PAD")
word2idx = {w: i for i, w in enumerate(words)}
tag2idx = {t: i for i, t in enumerate(tags)}
idx2tag = {i: t for t, i in tag2idx.items()}

MAX_LEN = 50
n_words = len(words)
n_tags = len(tags)

# H√†m helper ƒë·ªÉ encode d·ªØ li·ªáu cho DL
def encode_data(sents, word2idx, tag2idx, max_len):
    X = [[word2idx.get(w[0], word2idx["UNK"]) for w in s] for s in sents]
    X = pad_sequences(X, maxlen=max_len, padding="post", value=word2idx["PAD"])
    
    y = [[tag2idx[w[1]] for w in s] for s in sents]
    y = pad_sequences(y, maxlen=max_len, padding="post", value=tag2idx["O"]) # Pad label b·∫±ng 'O'
    y = [to_categorical(i, num_classes=n_tags) for i in y]
    return X, np.array(y)

# Encode c√°c t·∫≠p
X_train_dl, y_train_dl = encode_data(train_sents, word2idx, tag2idx, MAX_LEN)
X_val_dl, y_val_dl = encode_data(val_sents, word2idx, tag2idx, MAX_LEN) # D√πng Validation ·ªü ƒë√¢y
X_test_dl, y_test_dl = encode_data(test_sents, word2idx, tag2idx, MAX_LEN)

# Build Model
model = Sequential([
    Embedding(input_dim=n_words, output_dim=50, input_length=MAX_LEN),
    Bidirectional(LSTM(units=64, return_sequences=True)),
    Dropout(0.3),
    TimeDistributed(Dense(n_tags, activation="softmax"))
])
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train c√≥ s·ª≠ d·ª•ng validation_data
history = model.fit(X_train_dl, y_train_dl, 
                    validation_data=(X_val_dl, y_val_dl), 
                    batch_size=32, epochs=50, verbose=1)

# ƒê√°nh gi√° Bi-LSTM
print(">>> REPORT Bi-LSTM:")
y_pred_probs = model.predict(X_test_dl)
y_pred_indices = np.argmax(y_pred_probs, axis=-1)
y_test_indices = np.argmax(y_test_dl, axis=-1)

# Chuy·ªÉn ng∆∞·ª£c t·ª´ s·ªë v·ªÅ nh√£n (b·ªè padding ƒë·ªÉ t√≠nh ch√≠nh x√°c)
pred_tags_flat = []
true_tags_flat = []

for i in range(len(test_sents)):
    # L·∫•y ƒë·ªô d√†i th·ª±c t·∫ø c·ªßa c√¢u (tr√°nh l·∫•y ph·∫ßn padding)
    true_len = len(test_sents[i])
    # Ch·ªâ l·∫•y ph·∫ßn d·ª± ƒëo√°n t∆∞∆°ng ·ª©ng v·ªõi ƒë·ªô d√†i c√¢u
    p_tags = [idx2tag[idx] for idx in y_pred_indices[i][:true_len]]
    t_tags = [idx2tag[idx] for idx in y_test_indices[i][:true_len]]
    
    pred_tags_flat.extend(p_tags)
    true_tags_flat.extend(t_tags)

print(classification_report(true_tags_flat, pred_tags_flat, digits=4))

bi_lstm_config = {
    'word2idx': word2idx,
    'tag2idx': tag2idx,
    'idx2tag': idx2tag,
    'max_len': MAX_LEN
}

with open(os.path.join(save_dir, 'bi_lstm_config.pkl'), 'wb') as f:
    pickle.dump(bi_lstm_config, f)
print("Saved: bi_lstm_config.pkl")

model.save(os.path.join(save_dir, 'bi_lstm_model.keras'))
print("Saved: bi_lstm_model.keras")



--- 5. TRAINING BI-LSTM ---




Epoch 1/50
[1m29/29[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m8s[0m 76ms/step - accuracy: 0.8380 - loss: 1.2233 - val_accuracy: 0.8768 - val_loss: 0.5889
Epoch 2/50
[1m29/29[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.8706 - loss: 0.5930 - val_accuracy: 0.8768 - val_loss: 0.5416
Epoch 3/50
[1m29/29[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.8706 - loss: 0.5614 - val_accuracy: 0.8768 - val_loss: 0.5216
Epoch 4/50
[1m29/29[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.8706 - loss: 0.5298 - val_accuracy: 0.8768 - val_loss: 0.4932
Epoch 5/50
[1m29/29[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 62ms/step - accuracy: 0.8720 - loss: 0.4939 - val_accuracy: 0.8791 - val_loss: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Saved: bi_lstm_model.keras


In [23]:
import pickle
import re
import numpy as np
import os
from pyvi import ViTokenizer
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

# ==============================================================================
# 1. H√ÄM TI·ªÄN X·ª¨ L√ù (GI·ªêNG H·ªÜT L√öC TRAIN)
# ==============================================================================
def preprocess_text_for_prediction(text):
    """
    M√¥ ph·ªèng l·∫°i h√†m 'convert_to_bio_with_cleaning' nh∆∞ng b·ªè ph·∫ßn g√°n nh√£n.
    Input: C√¢u vƒÉn b·∫£n th√¥.
    Output: List c√°c token ƒë√£ ƒë∆∞·ª£c l√†m s·∫°ch y h·ªát d·ªØ li·ªáu train.
    """
    # 1. Tokenize b·∫±ng PyVi
    tokenized_text = ViTokenizer.tokenize(text)
    raw_tokens = tokenized_text.split()
    
    cleaned_tokens = []
    
    # 2. L√†m s·∫°ch b·∫±ng Regex (COPY Y H·ªÜT T·ª™ CODE C·ª¶A B·∫†N)
    for token in raw_tokens:
        clean_t = re.sub(r'[^\w\s\d_√Ä√Å√Ç√É√à√â√ä√å√ç√í√ì√î√ï√ô√öƒÇƒêƒ®≈®∆†√†√°√¢√£√®√©√™√¨√≠√≤√≥√¥√µ√π√∫ƒÉƒëƒ©≈©∆°∆ØƒÇ√Ç√ä√î∆†∆Ø∆∞ƒÉ√¢√™√¥∆°∆∞]', '', token)
        
        # Ch·ªâ gi·ªØ l·∫°i token n√†o c√≤n n·ªôi dung sau khi clean
        if clean_t:
            cleaned_tokens.append(clean_t)
            
    return cleaned_tokens

# ==============================================================================
# 2. H√ÄM T·∫†O FEATURES (GI·ªêNG L√öC TRAIN)
# ==============================================================================
def get_features_for_prediction(sent):
    sent_features = []
    for i in range(len(sent)):
        word = sent[i]
        features = {
            'bias': 1.0,
            'word.lower()': word.lower(),
            'word.isupper()': word.isupper(),
            'word.istitle()': word.istitle(),
            'word.isdigit()': word.isdigit(),
            'word.has_underscore': '_' in word, # Feature quan tr·ªçng c·ªßa PyVi
        }
        if i > 0:
            word1 = sent[i-1]
            features.update({'-1:word.lower()': word1.lower(), '-1:word.istitle()': word1.istitle()})
        else:
            features['BOS'] = True

        if i < len(sent)-1:
            word1 = sent[i+1]
            features.update({'+1:word.lower()': word1.lower(), '+1:word.istitle()': word1.istitle()})
        else:
            features['EOS'] = True
        sent_features.append(features)
    return sent_features

# ==============================================================================
# 3. LOAD MODEL
# ==============================================================================
save_dir = 'saved_models' # ƒê·∫£m b·∫£o ƒë∆∞·ªùng d·∫´n ƒë√∫ng

print("--- Loading Models ---")
# Load CRF
with open(os.path.join(save_dir, 'crf_model.pkl'), 'rb') as f:
    loaded_crf = pickle.load(f)

# Load Classical ML Components
with open(os.path.join(save_dir, 'vectorizer.pkl'), 'rb') as f:
    loaded_v = pickle.load(f)
with open(os.path.join(save_dir, 'lr_model.pkl'), 'rb') as f:
    loaded_lr = pickle.load(f)
with open(os.path.join(save_dir, 'rf_model.pkl'), 'rb') as f:
    loaded_rf = pickle.load(f)

# Load Bi-LSTM Components
with open(os.path.join(save_dir, 'bi_lstm_config.pkl'), 'rb') as f:
    lstm_config = pickle.load(f)
loaded_lstm = load_model(os.path.join(save_dir, 'bi_lstm_model.keras'))

word2idx = lstm_config['word2idx']
idx2tag = lstm_config['idx2tag']
MAX_LEN = lstm_config['max_len']
print(">>> Ready to predict!")

# ==============================================================================
# 4. TH·ª∞C HI·ªÜN D·ª∞ ƒêO√ÅN
# ==============================================================================

# D·ªØ li·ªáu m·ªõi (ch∆∞a t·ª´ng th·∫•y l√∫c train)
test_sentences = [
    'T√°c ph·∫©m "Cho t√¥i xin m·ªôt v√© ƒëi tu·ªïi th∆°" c·ªßa Nguy·ªÖn Nh·∫≠t √Ånh ra m·∫Øt nƒÉm 2008.',
    'Ch√≠ Ph√®o v√† Th·ªã N·ªü l√† hai nh√¢n v·∫≠t kinh ƒëi·ªÉn c·ªßa nh√† vƒÉn Nam Cao.',
    'B√¨nh Ng√¥ ƒë·∫°i c√°o do Nguy·ªÖn Tr√£i so·∫°n th·∫£o ƒë·ªÉ tuy√™n c√°o chi·∫øn th·∫Øng qu√¢n Minh.'
]

print("\n================ PREDICTION REPORT ================")

for text in test_sentences:
    print(f"\nüìù Input Raw: {text}")
    
    # B∆Ø·ªöC QUAN TR·ªåNG: D√πng h√†m preprocess chu·∫©n
    tokens = preprocess_text_for_prediction(text)
    print(f"üîß Cleaned Tokens: {tokens}") 
    
    if not tokens:
        print("-> C√¢u n√†y b·ªã l·ªçc h·∫øt s·∫°ch k√Ω t·ª± ƒë·∫∑c bi·ªát, b·ªè qua.")
        continue

    # --- A. CRF ---
    features = get_features_for_prediction(tokens)
    pred_crf = loaded_crf.predict_single(features)
    
    # --- B. Logistic Regression & Random Forest ---
    features_vec = loaded_v.transform(features)
    pred_lr = loaded_lr.predict(features_vec)
    pred_rf = loaded_rf.predict(features_vec)
    
    # --- C. Bi-LSTM ---
    seq_idx = [word2idx.get(w, word2idx['UNK']) for w in tokens]
    seq_padded = pad_sequences([seq_idx], maxlen=MAX_LEN, padding='post', value=word2idx['PAD'])
    pred_prob_lstm = loaded_lstm.predict(seq_padded, verbose=0)
    pred_idx_lstm = np.argmax(pred_prob_lstm, axis=-1)[0]
    
    # L·∫•y nh√£n t∆∞∆°ng ·ª©ng ƒë·ªô d√†i th·ª±c t·∫ø
    pred_lstm = [idx2tag[i] for i in pred_idx_lstm[:len(tokens)]]

    # --- IN K·∫æT QU·∫¢ ---
    print(f"{'TOKEN':<20} | {'CRF':<10} | {'LogReg':<10} | {'Bi-LSTM':<10}")
    print("-" * 60)
    
    for i in range(len(tokens)):
        t_crf = pred_crf[i]
        t_lr = pred_lr[i]
        # t_rf = pred_rf[i] # C√≥ th·ªÉ th√™m v√†o n·∫øu mu·ªën
        t_lstm = pred_lstm[i] if i < len(pred_lstm) else "O"
        
        # Ch·ªâ in nh·ªØng token c√≥ nh√£n kh√°c 'O' ƒë·ªÉ d·ªÖ nh√¨n (ho·∫∑c in h·∫øt t√πy b·∫°n)
        # if t_crf != 'O' or t_lstm != 'O': 
        print(f"{tokens[i]:<20} | {t_crf:<10} | {t_lr:<10} | {t_lstm:<10}")

--- Loading Models ---
>>> Ready to predict!


üìù Input Raw: T√°c ph·∫©m "Cho t√¥i xin m·ªôt v√© ƒëi tu·ªïi th∆°" c·ªßa Nguy·ªÖn Nh·∫≠t √Ånh ra m·∫Øt nƒÉm 2008.
üîß Cleaned Tokens: ['T√°c_ph·∫©m', 'Cho', 't√¥i', 'xin', 'm·ªôt', 'v√©', 'ƒëi', 'tu·ªïi_th∆°', 'c·ªßa', 'Nguy·ªÖn_Nh·∫≠t_√Ånh', 'ra_m·∫Øt', 'nƒÉm', '2008']
TOKEN                | CRF        | LogReg     | Bi-LSTM   
------------------------------------------------------------
T√°c_ph·∫©m             | O          | O          | O         
Cho                  | O          | O          | O         
t√¥i                  | O          | O          | O         
xin                  | O          | O          | O         
m·ªôt                  | O          | O          | O         
v√©                   | O          | O          | O         
ƒëi                   | O          | O          | O         
tu·ªïi_th∆°             | O          | O          | O         
c·ªßa                  | O          | O          | O         
Nguy·