# Here we load a saved model and use it

In [20]:
import pandas as pd
import tensorflow_hub as hub
import torch
import torch.nn as nn
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [3]:
class SiameseNetwork(nn.Module):
    def __init__(self, input_size):
        super(SiameseNetwork, self).__init__()
        self.shared_network = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        self.output_layer = nn.Linear(64, 1)

    def forward_one_side(self, x):
        return self.shared_network(x)

    def forward(self, input1, input2):
        output1 = self.forward_one_side(input1)
        output2 = self.forward_one_side(input2)
        distance = torch.abs(output1 - output2)
        output = torch.sigmoid(self.output_layer(distance))
        return output

In [6]:
model = SiameseNetwork(512)
model.load_state_dict(torch.load('siamese_network.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [24]:
resume = "Arman Drismir, Software Engineer, Python, Javascript, Pandas"
job_posting = "Looking for a HR Manager, ms word, google sheets"

# Preprocess the text
nltk.download('stopwords')
nltk.download('wordnet')

def preprocess_text(text_series):
    text_series = text_series.fillna("")  # Replace NaN with empty strings
    text_series = text_series.apply(lambda x: re.sub(r'[^\w\s]', '', x.lower()))
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    text_series = text_series.apply(lambda x: ' '.join(
        lemmatizer.lemmatize(word) for word in x.split() if word not in stop_words
    ))
    return text_series

resume = preprocess_text(pd.Series(resume))
job_posting = preprocess_text(pd.Series(job_posting))

# Vectorize the text
universal_sentence_encoder = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

def vectorize_text(text_series):
    embeddings = universal_sentence_encoder(text_series.tolist()).numpy()
    return embeddings

resume = vectorize_text(resume)
job_posting = vectorize_text(job_posting)

resume = torch.tensor(resume, dtype=torch.float32).flatten()
job_posting = torch.tensor(job_posting, dtype=torch.float32).flatten()

model.eval()
with torch.no_grad():
        similarity_score = model(resume.unsqueeze(0), job_posting.unsqueeze(0)).item()
print(similarity_score)

# print(resume)
# print(job_posting)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/armandrismir/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/armandrismir/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


0.24480126798152924
