In [9]:
import numpy as np
from sklearn.model_selection import train_test_split

# loading method
X = np.load("../data/abacus_features.npy")
y = np.load("../data/abacus_labels.npy") 

print("X shape:", X.shape)
print("y shape:", y.shape)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(" Data loaded and split:")
print("Train:", X_train.shape, "| Test:", X_test.shape)

X shape: (17878, 250)
y shape: (17878,)
 Data loaded and split:
Train: (14302, 250) | Test: (3576, 250)


In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [11]:
model = Sequential()
model.add(Embedding(input_dim=1000, output_dim=64, input_length=250))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

In [12]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [13]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=5, 
    batch_size=32,
    verbose=1
)


Epoch 1/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 62ms/step - accuracy: 0.9491 - loss: 0.2370 - val_accuracy: 0.9533 - val_loss: 0.1890
Epoch 2/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 67ms/step - accuracy: 0.9494 - loss: 0.2056 - val_accuracy: 0.9533 - val_loss: 0.1892
Epoch 3/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 69ms/step - accuracy: 0.9530 - loss: 0.1947 - val_accuracy: 0.9533 - val_loss: 0.1892
Epoch 4/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 68ms/step - accuracy: 0.9491 - loss: 0.2062 - val_accuracy: 0.9533 - val_loss: 0.1898
Epoch 5/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 71ms/step - accuracy: 0.9546 - loss: 0.1903 - val_accuracy: 0.9533 - val_loss: 0.1888


In [14]:
loss, acc = model.evaluate(X_test, y_test)
print(f" Test Accuracy: {round(acc * 100, 2)}%")


model.save("../model/abacus_lstm.h5")
print(" Model saved as abacus_lstm.h5")


[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.9557 - loss: 0.1818




 Test Accuracy: 95.33%
 Model saved as abacus_lstm.h5


In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(texts).toarray()


NameError: name 'texts' is not defined

In [17]:
# Save model (fake resume classifier)
model.save("model/abacus_lstm.h5")
# Save tokenizer
import os
import pickle

# Ensure the directory exists
os.makedirs("../model", exist_ok=True)

# Save the TF-IDF vectorizer
with open("../model/tokenizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("Tokenizer (TF-IDF vectorizer) saved at model/tokenizer.pkl")





NameError: name 'vectorizer' is not defined

In [19]:
# 03_train_abacus_model.ipynb

import numpy as np
import os
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Load data
X = np.load("../data/abacus_features.npy")
y = np.load("../data/abacus_labels.npy")

print("X shape:", X.shape)
print("y shape:", y.shape)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("✅ Data split done.")
print("Train:", X_train.shape, "Test:", X_test.shape)

# Build the model
model = Sequential()
model.add(Embedding(input_dim=1000, output_dim=64, input_length=250))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=5,
    batch_size=32,
    verbose=1
)

# Evaluate
loss, acc = model.evaluate(X_test, y_test)
print(f"🎯 Test Accuracy: {round(acc * 100, 2)}%")

# Save the trained model
os.makedirs("../model", exist_ok=True)
model.save("../model/abacus_lstm_model.h5")
print("✅ Model saved as abacus_lstm_model.h5")


X shape: (17878, 250)
y shape: (17878,)
✅ Data split done.
Train: (14302, 250) Test: (3576, 250)


Epoch 1/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 65ms/step - accuracy: 0.9494 - loss: 0.2337 - val_accuracy: 0.9533 - val_loss: 0.1902
Epoch 2/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 69ms/step - accuracy: 0.9509 - loss: 0.2033 - val_accuracy: 0.9533 - val_loss: 0.1895
Epoch 3/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 69ms/step - accuracy: 0.9508 - loss: 0.1999 - val_accuracy: 0.9533 - val_loss: 0.1887
Epoch 4/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 72ms/step - accuracy: 0.9512 - loss: 0.1993 - val_accuracy: 0.9533 - val_loss: 0.1887
Epoch 5/5
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 74ms/step - accuracy: 0.9486 - loss: 0.2083 - val_accuracy: 0.9533 - val_loss: 0.1892
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.9557 - loss: 0.1825




🎯 Test Accuracy: 95.33%
✅ Model saved as abacus_lstm_model.h5
