In [2]:
pip install fasttext


Collecting fasttext
  Downloading fasttext-0.9.3.tar.gz (73 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/73.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.4/73.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)
Using cached pybind11-2.13.6-py3-none-any.whl (243 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (pyproject.toml) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.3-cp311-cp311-linux_x86_64.whl size=4313470 sha256=4b06f0c4dc2e7a448a9f9c6208bafe2edfd4e0652e82dc97fe869549cfda32b3
  Stored in directory: /root/.cache/pip/wheels/65/4f/35/5057db0249224e9ab55a51

In [3]:
pip install git+https://github.com/facebookresearch/fastText.git


Collecting git+https://github.com/facebookresearch/fastText.git
  Cloning https://github.com/facebookresearch/fastText.git to /tmp/pip-req-build-nac_j8_p
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/fastText.git /tmp/pip-req-build-nac_j8_p
  Resolved https://github.com/facebookresearch/fastText.git to commit 1142dc4c4ecbc19cc16eee5cdd28472e689267e6
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: fasttext
  Building wheel for fasttext (pyproject.toml) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp311-cp311-linux_x86_64.whl size=4313396 sha256=5449f96162cc1cf4613931051aff4a7425b859165b222f968e2dfb1cef6704ab
  Stored in directory: /tmp/pip-ephem-wheel-cache-a841t5fw/wheels/04/64/26/11ce8db1ddfa20541eeec84e6969a9d7582367261378c65307
Successfully built fa

In [36]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load dataset
df = pd.read_csv("filipino_resumes_with_diverse_cover_letters.csv")

# Identify column types
num_cols = ["Age", "Experience (Years)"]
text_cols = ["Skills", "Certifications", "Previous Job Role", "Cover Letter"]
cat_cols = ["Education", "Region", "Languages Spoken", "Location", "Availability", "Education Institution", "Role"]

# Encode categorical columns
label_encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Save Label Encoders
with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

# Standardize numerical columns
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

# Save Scaler
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# Tokenize text columns
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(df[text_cols].astype(str).values.flatten())

# Convert text data to sequences
X_text = []
for col in text_cols:
    X_text.append(pad_sequences(tokenizer.texts_to_sequences(df[col].astype(str)), maxlen=100))

X_text = np.hstack(X_text)
X_num = df[num_cols].values
X_cat = df[cat_cols].values
y = df["Role"].values

# Combine all features
X = np.hstack((X_text, X_num, X_cat))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build Neural Network
model = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation="relu"),
    Dropout(0.2),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")  # Assuming binary classification
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32)

# Save Model
model.save("model.h5")

# Save Tokenizer
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.6759 - loss: 1.9167 - val_accuracy: 0.9588 - val_loss: 0.1095
Epoch 2/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8858 - loss: 0.2852 - val_accuracy: 1.0000 - val_loss: 0.0109
Epoch 3/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9669 - loss: 0.0882 - val_accuracy: 1.0000 - val_loss: 0.0029
Epoch 4/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9870 - loss: 0.0367 - val_accuracy: 1.0000 - val_loss: 2.6828e-04
Epoch 5/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9946 - loss: 0.0206 - val_accuracy: 1.0000 - val_loss: 2.3653e-04
Epoch 6/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9942 - loss: 0.0127 - val_accuracy: 1.0000 - val_loss: 6.9022e-05
Epoch 7/50
[1m100/100[0m 



In [3]:
# Save the entire model
model.save("model.h5")


NameError: name 'model' is not defined

In [33]:
import pickle

# Assuming label_encoders is a dictionary of LabelEncoders used for categorical columns
with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

In [1]:
pip install python-dotenv


Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [2]:
import secrets
print(secrets.token_urlsafe(24))


9J8-Yj5_J989bv25dKiGGYNO1SuMxyQU
