In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder

In [5]:
def preprocess_data(user_input, answer):

    le_user = LabelEncoder()
    le_profession = LabelEncoder()

    users = [user_input] * len(answer)  # user input as one user interacting with multiple professions
    professions = [item[0] for item in answer]  # obtain profession names

    user_ids = le_user.fit_transform(users)
    profession_ids = le_profession.fit_transform(professions)
    scores = torch.tensor([item[1] for item in answer], dtype=torch.float32)  # original scores

    return user_ids, profession_ids, scores, le_user, le_profession

In [6]:
# GMF-based solution

class GMF(nn.Module):

    def __init__(self, num_users, num_items, embedding_dim):

        super(GMF, self).__init__()

        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.output_layer = nn.Linear(embedding_dim, 1)

    def forward(self, user, item):

        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)

        interaction = user_emb * item_emb

        return torch.sigmoid(self.output_layer(interaction))


In [9]:
def train_model(model, user_ids, profession_ids, scores, num_epochs=10, lr=0.001):

    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()

    user_ids = torch.tensor(user_ids, dtype=torch.long)
    profession_ids = torch.tensor(profession_ids, dtype=torch.long)

    for epoch in range(num_epochs):

      # forward pass
        model.train()
        outputs = model(user_ids, profession_ids).squeeze()
        loss = criterion(outputs, scores)

        # backward propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

In [10]:
def predict_top_professions(model, user_input, answer, le_user, le_profession, top_k=20):

    user_id = le_user.transform([user_input])[0]  # encode user input
    professions = [item[0] for item in answer]
    profession_ids = le_profession.transform(professions)  # encode professions

    user_ids = torch.tensor([user_id] * len(profession_ids), dtype=torch.long)
    profession_ids = torch.tensor(profession_ids, dtype=torch.long)

    # obtain scores
    with torch.no_grad():
        predictions = model(user_ids, profession_ids).squeeze().numpy()

    # sort
    ranked_professions = sorted(zip(professions, predictions), key=lambda x: x[1], reverse=True)
    return ranked_professions[:top_k]

In [11]:
# sample data, change to the real feedback pipeline output

user_input = "math and humanities"
answer = [("Scientist", 0.8), ("Historian", 0.6), ("Engineer", 0.9), ("Teacher", 0.7)]

user_ids, profession_ids, scores, le_user, le_profession = preprocess_data(user_input, answer)

In [None]:
# Train and evaluate GMF
gmf_model = GMF(num_users=len(set(user_ids)),
                num_items=len(set(profession_ids)),
                embedding_dim=16
                )

train_model(gmf_model, user_ids, profession_ids, scores)

In [15]:
print("GMF predictions:")

top_20_professions = predict_top_professions(gmf_model, user_input, answer, le_user, le_profession)

for profession, score in top_20_professions:
    print(f"{profession}: {score:.4f}")

GMF predictions:
Scientist: 0.6226
Historian: 0.5572
Engineer: 0.3693
Teacher: 0.3217
