In [2]:
pip install fasttext


Collecting fasttext
  Downloading fasttext-0.9.3.tar.gz (73 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/73.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.4/73.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)
Using cached pybind11-2.13.6-py3-none-any.whl (243 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (pyproject.toml) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.3-cp311-cp311-linux_x86_64.whl size=4313470 sha256=4b06f0c4dc2e7a448a9f9c6208bafe2edfd4e0652e82dc97fe869549cfda32b3
  Stored in directory: /root/.cache/pip/wheels/65/4f/35/5057db0249224e9ab55a51

In [3]:
pip install git+https://github.com/facebookresearch/fastText.git


Collecting git+https://github.com/facebookresearch/fastText.git
  Cloning https://github.com/facebookresearch/fastText.git to /tmp/pip-req-build-nac_j8_p
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/fastText.git /tmp/pip-req-build-nac_j8_p
  Resolved https://github.com/facebookresearch/fastText.git to commit 1142dc4c4ecbc19cc16eee5cdd28472e689267e6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: fasttext
  Building wheel for fasttext (pyproject.toml) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp311-cp311-linux_x86_64.whl size=4313396 sha256=5449f96162cc1cf4613931051aff4a7425b859165b222f968e2dfb1cef6704ab
  Stored in directory: /tmp/pip-ephem-wheel-cache-a841t5fw/wheels/04/64/26/11ce8db1ddfa20541eeec84e6969a9d7582367261378c65307
Successfully built fa

In [24]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Input, concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import save_model
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping
import pickle
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
data = pd.read_csv('filipino_resumes_with_diverse_cover_letters.csv')

# Preprocessing
data['Education'] = data['Education'].apply(lambda x: 1 if x == 'Masters' else 0)
data['Experience'] = pd.to_numeric(data['Experience (Years)'], errors='coerce')
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')

# Map roles to numeric classes (Chemical Engineer -> 0, Accountant -> 1, None -> 2)
role_map = {'Chemical Engineer': 0, 'Accountant': 1, 'None': 2}
data['Role'] = data['Role'].map(role_map)

# One-hot encode the target variable (Role)
y = to_categorical(data['Role'], num_classes=3)

# Features: Resume data
X_resume = data[['Age', 'Experience', 'Education']]

# Train-test split
X_train_res, X_test_res, y_train, y_test = train_test_split(X_resume, y, test_size=0.3, random_state=42)

# Standardize the resume features
scaler = StandardScaler()
X_train_res = scaler.fit_transform(X_train_res)
X_test_res = scaler.transform(X_test_res)

# Save the scaler to a file
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Tokenizer for cover letter
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['Cover Letter'])  # Ensure 'Cover Letter' column exists in your dataset

# Save tokenizer for future use
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

# Convert cover letters to sequences
X_cover = tokenizer.texts_to_sequences(data['Cover Letter'])

# Pad sequences for consistency
X_cover = pad_sequences(X_cover, padding='post')

# Train-test split for cover letter data
X_train_cover, X_test_cover = train_test_split(X_cover, test_size=0.3, random_state=42)

# Build the model with regularization and adjusted complexity
resume_input = Input(shape=(X_train_res.shape[1],), name='resume_input')
x = Dense(32, activation='relu', kernel_regularizer=l2(0.01))(resume_input)
x = Dropout(0.6)(x)  # Increased dropout to 0.6
x = Dense(16, activation='relu', kernel_regularizer=l2(0.01))(x)

# Cover letter input (text data)
cover_input = Input(shape=(X_train_cover.shape[1],), name='cover_input')
y = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100)(cover_input)
y = LSTM(32, kernel_regularizer=l2(0.01))(y)  # Reduced LSTM units
y = Dropout(0.6)(y)  # Increased dropout to 0.6
y = Dense(16, activation='relu', kernel_regularizer=l2(0.01))(y)

# Concatenate both inputs
combined = concatenate([x, y])

# Output layer
z = Dense(3, activation='softmax')(combined)

# Build and compile the model
model = Model(inputs=[resume_input, cover_input], outputs=z)
model.compile(optimizer=Adam(learning_rate=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])

# Class weights to handle imbalanced data (giving more importance to minority classes)
class_weights = {0: 2.0, 1: 1.0, 2: 2.0}

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
model.fit([X_train_res, X_train_cover], y_train, epochs=100, batch_size=32, verbose=1,
          class_weight=class_weights, validation_data=([X_test_res, X_test_cover], y_test),
          callbacks=[early_stopping])

# Save the model as 'model.h5'
model.save('model.h5')

# Evaluate the model
y_pred = model.predict([X_test_res, X_test_cover])
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert y_test back to single integer values (not one-hot)
y_test_classes = np.argmax(y_test, axis=1)

# Display classification report and accuracy
print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes))

print("Accuracy Score:", accuracy_score(y_test_classes, y_pred_classes))

# Save label encoders for roles
role_encoder = LabelEncoder()
role_encoder.fit(data['Role'])
with open('label_encoders.pkl', 'wb') as f:
    pickle.dump(role_encoder, f)

print("Model, scalers, and encoders saved successfully!")


Epoch 1/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 31ms/step - accuracy: 0.4261 - loss: 2.6930 - val_accuracy: 0.4883 - val_loss: 1.6784
Epoch 2/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - accuracy: 0.6238 - loss: 1.5891 - val_accuracy: 1.0000 - val_loss: 0.7030
Epoch 3/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - accuracy: 0.9955 - loss: 0.6804 - val_accuracy: 1.0000 - val_loss: 0.4109
Epoch 4/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 42ms/step - accuracy: 0.9847 - loss: 0.5083 - val_accuracy: 1.0000 - val_loss: 0.3340
Epoch 5/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - accuracy: 0.9991 - loss: 0.3511 - val_accuracy: 1.0000 - val_loss: 0.2762
Epoch 6/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - accuracy: 0.9996 - loss: 0.2874 - val_accuracy: 1.0000 - val_loss: 0.2327
Epoch 7/100
[1m88/88[0m [



[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       586
           1       1.00      1.00      1.00       614

    accuracy                           1.00      1200
   macro avg       1.00      1.00      1.00      1200
weighted avg       1.00      1.00      1.00      1200

Accuracy Score: 1.0
Model, scalers, and encoders saved successfully!


In [3]:
# Save the entire model
model.save("model.h5")


NameError: name 'model' is not defined

In [33]:
import pickle

# Assuming label_encoders is a dictionary of LabelEncoders used for categorical columns
with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

In [1]:
pip install python-dotenv


Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [2]:
import secrets
print(secrets.token_urlsafe(24))


9J8-Yj5_J989bv25dKiGGYNO1SuMxyQU
