# **SatSet Recommendation System**

Import necessary libraries and modules

In [53]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [54]:
!pip install keras_tuner
!pip install surprise



In [55]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import keras_tuner as kt
import pickle

Load the datasets into DataFrame

In [56]:
users_path = '/content/drive/MyDrive/SatSet/users.csv'
tech_path = '/content/drive/MyDrive/SatSet/technicians.csv'

users_df = pd.read_csv(users_path)
technicians_df = pd.read_csv(tech_path)
ori_technician = technicians_df.copy()

## **1. Data Understanding**
Get the shape of the datasets

In [57]:
print(technicians_df.shape)
print(users_df.shape)

(200, 10)
(250, 6)


Display the first few rows of the dataset

In [58]:
technicians_df.head()

Unnamed: 0,technicianid,name,phonenumber,email,skills,experience,certifications,address,location,ratingsreceived
0,1,Erik Okta Lestari,6287265995831,eriklestari@gmail.com,"Computer Installation, AC Repair",13,,Gg. Rawamangun No. 019,Bitung,4.2
1,2,Raisa Lasmono Najmudin,6287452722533,raisanajmudin@gmail.com,"Washing Machine Maintenance, Electrical Repair",4,,Gg. Rumah Sakit No. 74,Tegal,4.6
2,3,Tasnim Utama,62829038854284,tasnimutama@gmail.com,"AC Maintenance, Plumbing Installation",4,Sertifikasi Profesi Teknik Pendingin dan Tata ...,Jl. Yos Sudarso No. 720,Jayapura,4.9
3,4,Warji Ghani Wahyudin,6289233671587,warjiwahyudin@gmail.com,"Computer Repair, Plumbing Repair",2,,Jl. Rajawali Timur No. 95,Pariaman,5.0
4,5,Tania Jailani,6282057406206,taniajailani@gmail.com,"Plumbing Installation, Computer Repair",3,SKA Ahli Teknik Plambing dan Pompa Mekanika,Gg. Monginsidi No. 200,Bogor,4.0


In [59]:
users_df.head()

Unnamed: 0,userid,name,location,preferences,technicianid,ratingsgiven
0,1,Himawan Sakura Nurdiyanti,Sungai Penuh,Washing Machine Installation,"[167, 137, 143, 156]","[4.1, 4.0, 4.3, 4.1]"
1,2,Fathonah Puspita,Bogor,Refrigerator Installation,"[99, 186, 84]","[4.0, 3.6, 4.9]"
2,3,Martaka Purwa Namaga,Kupang,Electrical Repair,"[88, 190, 117, 84, 23]","[4.1, 4.7, 4.3, 4.4, 4.5]"
3,4,Kardi Yuliana Zulkarnain,Meulaboh,Washing Machine Installation,[137],[3.6]
4,5,Martaka Tarihoran,Manado,Computer Installation,[81],[3.8]


Check the data types of each column

In [60]:
print(technicians_df.info())
print(users_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   technicianid     200 non-null    int64  
 1   name             200 non-null    object 
 2   phonenumber      200 non-null    int64  
 3   email            200 non-null    object 
 4   skills           200 non-null    object 
 5   experience       200 non-null    int64  
 6   certifications   115 non-null    object 
 7   address          200 non-null    object 
 8   location         200 non-null    object 
 9   ratingsreceived  200 non-null    float64
dtypes: float64(1), int64(3), object(6)
memory usage: 15.8+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   userid        250 non-null    int64 
 1   name          250 non-null    object
 2 

Check missing values

In [61]:
technicians_df.isnull().sum()

technicianid        0
name                0
phonenumber         0
email               0
skills              0
experience          0
certifications     85
address             0
location            0
ratingsreceived     0
dtype: int64

Get summary statistics of the numerical columns

In [62]:
technicians_df.describe()

Unnamed: 0,technicianid,phonenumber,experience,ratingsreceived
count,200.0,200.0,200.0,200.0
mean,100.5,31742310000000.0,4.705,4.2975
std,57.879185,28214530000000.0,3.590604,0.42027
min,1.0,6281075000000.0,1.0,3.5
25%,50.75,6284485000000.0,2.0,4.0
50%,100.5,6289188000000.0,4.0,4.3
75%,150.25,62853950000000.0,5.0,4.7
max,200.0,62899930000000.0,15.0,5.0


In [63]:
users_df.describe()

Unnamed: 0,userid
count,250.0
mean,125.5
std,72.312977
min,1.0
25%,63.25
50%,125.5
75%,187.75
max,250.0


## **2. Data Preprocessing**
Drop Irrelevant Column

In [64]:
technicians_df = technicians_df.drop(['email', 'phonenumber', 'location', 'address'], axis=1)

Handling Missing Values

In [65]:
technicians_df['skills'] = technicians_df['skills'].fillna('')
technicians_df['certifications'] = technicians_df['certifications'].fillna('None')

In [66]:
technicians_df['experience'] = technicians_df['experience'].fillna(0)
technicians_df['ratingsreceived'] = technicians_df['ratingsreceived'].fillna(0)

Convert to Lowercase

In [67]:
technicians_df['skills'] = technicians_df['skills'].str.lower()
technicians_df['certifications'] = technicians_df['certifications'].str.lower()

## **3. Content-Based Filtering**

### **3.1 Model Development**
Text feature extraction using TF-IDF Vectorizer

In [68]:
tfidf = TfidfVectorizer()
skills_tfidf = tfidf.fit_transform(technicians_df['skills']).toarray()

Normalize using StandardScaler

In [69]:
scaler_experience = StandardScaler()
scaler_ratings = StandardScaler()

# Fit and transform the 'experience' column
technicians_df['experience'] = scaler_experience.fit_transform(technicians_df[['experience']])

# Fit and transform the 'ratings' column
technicians_df['ratingsreceived'] = scaler_ratings.fit_transform(technicians_df[['ratingsreceived']])

Encoding Categorical Data using One-Hot Encoding

In [70]:
encoder = OneHotEncoder(handle_unknown='ignore')

# Fit the encoder to the 'certifications' column and transform the data
certifications_encoded_sparse = encoder.fit_transform(technicians_df[['certifications']])

# Convert the sparse matrix to a dense array if needed
certifications_encoded = certifications_encoded_sparse.toarray()

# Convert the encoded data into a DataFrame with appropriate column names
certifications_encoded_df = pd.DataFrame(certifications_encoded, columns=encoder.get_feature_names_out())


Combining Features

In [71]:
X_exp = technicians_df['experience'].values.reshape(-1, 1)
X_rating = technicians_df['ratingsreceived'].values.reshape(-1, 1)
X_cert = certifications_encoded

# Combine features into a single array
X = np.hstack([skills_tfidf, X_exp, X_cert, X_rating])

### **3.2 Model Training**

In [72]:
def build_content_based_model(input_dim):
    model = Sequential()
    model.add(Dense(256, input_dim=input_dim, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(input_dim, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

In [73]:
model_cb = build_content_based_model(X.shape[1])
model_cb.fit(X, X, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7c50da391900>

### **3.3 Model Evaluation**

In [74]:
y_pred = model_cb.predict(X)
mse = mean_squared_error(X, y_pred)
mae = mean_absolute_error(X, y_pred)
r2 = r2_score(X, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R^2 Score:", r2)

Mean Squared Error: 0.01449625289735206
Mean Absolute Error: 0.0723996895230996
R^2 Score: 0.21860523987799654


### **3.4 Model Optimization**
Model optimization using Keras Tuner

In [75]:
def optimized_content_based_model(hp):
    model = Sequential()
    model.add(Dense(hp.Int('units1', min_value=128, max_value=512, step=32), input_dim=X.shape[1], activation='relu'))
    model.add(Dropout(hp.Float('dropout1', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(hp.Int('units2', min_value=64, max_value=256, step=32), activation='relu'))
    model.add(Dropout(hp.Float('dropout2', min_value=0.2, max_value=0.5, step=0.1)))
    model.add(Dense(hp.Int('units3', min_value=32, max_value=128, step=16), activation='relu'))
    model.add(Dense(X.shape[1], activation='linear'))
    model.compile(optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')),
                  loss='mean_squared_error', metrics=['mean_absolute_error'])
    return model

In [76]:
tuner_cb = kt.Hyperband(
    optimized_content_based_model,
    objective='val_mean_absolute_error',
    max_epochs=50,
    factor=3,
    directory='my_dir',
    project_name='content_based'
)

Reloading Tuner from my_dir/content_based/tuner0.json


In [77]:
early_stopping = EarlyStopping(
    monitor='val_mean_absolute_error',
    patience=10,
    restore_best_weights=True
)

tuner_cb.search(X, X, epochs=50, validation_split=0.2, callbacks=[early_stopping])

# Get the best model
model_cb = tuner_cb.get_best_models(num_models=1)[0]

# Summary of the best model
model_cb.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 224)               9856      
                                                                 
 dropout (Dropout)           (None, 224)               0         
                                                                 
 dense_1 (Dense)             (None, 192)               43200     
                                                                 
 dropout_1 (Dropout)         (None, 192)               0         
                                                                 
 dense_2 (Dense)             (None, 80)                15440     
                                                                 
 dense_3 (Dense)             (None, 43)                3483      
                                                                 
Total params: 71979 (281.17 KB)
Trainable params: 71979 

In [78]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []

for train_index, val_index in kf.split(X):
    X_train, X_val = X[train_index], X[val_index]
    history = model_cb.fit(X_train, X_train, epochs=50, validation_data=(X_val, X_val), callbacks=[early_stopping], verbose=0)
    val_loss, val_mae = model_cb.evaluate(X_val, X_val, verbose=0)
    cv_scores.append(val_mae)

print(f'Cross-validation MSE scores: {cv_scores}')
print(f'Mean CV MSE: {np.mean(cv_scores)}')

Cross-validation MSE scores: [0.038134895265102386, 0.034582287073135376, 0.03096747025847435, 0.03729989007115364, 0.03417227789759636]
Mean CV MSE: 0.03503136411309242


Save the model and preprocessing objects

In [79]:
model_cb_path = 'content_based_filtering.h5'
tfidf_path = 'tfidf_vectorizer.pkl'
scaler_experience_path = 'scaler_experience.pkl'
scaler_ratings_path = 'scaler_ratings.pkl'
encoder_path = 'encoder.pkl'

model_cb.save(model_cb_path)
with open(tfidf_path, 'wb') as f:
    pickle.dump(tfidf, f)
with open(scaler_experience_path, 'wb') as f:
    pickle.dump(scaler_experience, f)
with open(scaler_ratings_path, 'wb') as f:
    pickle.dump(scaler_ratings, f)
with open(encoder_path, 'wb') as f:
    pickle.dump(encoder, f)

print("Model and preprocessing objects saved.")

Model and preprocessing objects saved.


  saving_api.save_model(


## **4. Collaborative Filtering**

### **4.1 Model Development**
Expand the technicianid and ratingsgiven columns

In [80]:
users_df['technicianid'] = users_df['technicianid'].apply(eval)
users_df['ratingsgiven'] = users_df['ratingsgiven'].apply(eval)

Create a list of all ratings

In [81]:
ratings = []
for i, row in users_df.iterrows():
    for tech_id, rating in zip(row['technicianid'], row['ratingsgiven']):
        ratings.append([row['userid'], tech_id, rating])

Convert to DataFrame

In [82]:
ratings_df = pd.DataFrame(ratings, columns=['userid', 'technicianid', 'rating'])

Map user and technician IDs to indices

In [83]:
user_id_map = {id: idx for idx, id in enumerate(ratings_df['userid'].unique())}
technician_id_map = {id: idx for idx, id in enumerate(ratings_df['technicianid'].unique())}

ratings_df['user_idx'] = ratings_df['userid'].map(user_id_map)
ratings_df['technician_idx'] = ratings_df['technicianid'].map(technician_id_map)

num_users = len(user_id_map)
num_technicians = len(technician_id_map)

Split the data

In [84]:
train_df, test_df = train_test_split(ratings_df, test_size=0.2, random_state=42)

### **4.2 Model Training**

In [85]:
def build_collaborative_model(embedding_size=50, dense_units=[128, 64, 32], dropout_rates=[0.3, 0.3, 0.3], learning_rate=1e-3):
    user_input = Input(shape=(1,), name='user')
    technician_input = Input(shape=(1,), name='technician')

    user_embedding = Embedding(num_users, embedding_size, name='user_embedding')(user_input)
    technician_embedding = Embedding(num_technicians, embedding_size, name='technician_embedding')(technician_input)

    user_vec = Flatten(name='flatten_user')(user_embedding)
    technician_vec = Flatten(name='flatten_technician')(technician_embedding)

    concat = Concatenate()([user_vec, technician_vec])

    x = concat
    for units, dropout in zip(dense_units, dropout_rates):
        x = Dense(units, activation='relu')(x)
        x = Dropout(dropout)(x)

    output = Dense(1, activation='linear')(x)

    model = Model([user_input, technician_input], output)
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',
                  metrics=['mean_absolute_error'])

    return model

In [86]:
model_cl = build_collaborative_model()

In [87]:
train_user = train_df['user_idx'].values
train_technician = train_df['technician_idx'].values
train_rating = train_df['rating'].values

test_user = test_df['user_idx'].values
test_technician = test_df['technician_idx'].values
test_rating = test_df['rating'].values

In [88]:
history = model_cl.fit(
    [train_user, train_technician], train_rating,
    batch_size=64,
    epochs=10,
    validation_data=([test_user, test_technician], test_rating),
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### **4.3 Model Evaluation**

In [89]:
results = model_cl.evaluate([test_user, test_technician], test_rating, verbose=1)
print(f"Test Loss: {results[0]}")
print(f"Test MAE: {results[1]}")

Test Loss: 0.5532161593437195
Test MAE: 0.6219996213912964


### **4.4 Model Optimization**
Model optimization using Keras Tuner

In [90]:
def optimized_collaborative_model(hp):
    embedding_size = hp.Int('embedding_size', min_value=30, max_value=100, step=10)

    user_input = Input(shape=(1,), name='user')
    technician_input = Input(shape=(1,), name='technician')

    user_embedding = Embedding(num_users, embedding_size, name='user_embedding')(user_input)
    technician_embedding = Embedding(num_technicians, embedding_size, name='technician_embedding')(technician_input)

    user_vec = Flatten(name='flatten_user')(user_embedding)
    technician_vec = Flatten(name='flatten_technician')(technician_embedding)

    concat = Concatenate()([user_vec, technician_vec])

    x = concat
    for i in range(hp.Int('num_layers', 1, 3)):
        x = Dense(hp.Int(f'dense_{i}_units', min_value=64, max_value=256, step=32), activation='relu')(x)
        x = Dropout(hp.Float(f'dropout_{i}', min_value=0.2, max_value=0.5, step=0.1))(x)

    output = Dense(1, activation='linear')(x)

    model = Model([user_input, technician_input], output)
    model.compile(optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')),
                  loss='mean_squared_error', metrics=['mean_absolute_error'])

    return model

In [91]:
tuner_cl = kt.Hyperband(
    optimized_collaborative_model,
    objective='val_mean_absolute_error',
    max_epochs=10,
    hyperband_iterations=2,
    directory='my_dir',
    project_name='collaborative'
)

Reloading Tuner from my_dir/collaborative/tuner0.json


In [92]:
tuner_cl.search(
    [train_user, train_technician], train_rating,
    epochs=10,
    validation_data=([test_user, test_technician], test_rating),
    batch_size=64
)

# Get the optimal hyperparameters
best_hps = tuner_cl.get_best_hyperparameters(num_trials=1)[0]

In [93]:
model_cl = tuner_cl.hypermodel.build(best_hps)

In [94]:
history = model_cl.fit(
    [train_user, train_technician], train_rating,
    batch_size=64,
    epochs=10,
    validation_data=([test_user, test_technician], test_rating),
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [95]:
results = model_cl.evaluate([test_user, test_technician], test_rating, verbose=1)
print(f"Test Loss: {results[0]}")
print(f"Test MAE: {results[1]}")

Test Loss: 0.398813396692276
Test MAE: 0.5180251002311707


Save the model and features

In [96]:
model_cl.save('collaborative_filtering.h5')

with open('user_id_map.pkl', 'wb') as f:
    pickle.dump(user_id_map, f)

with open('technician_id_map.pkl', 'wb') as f:
    pickle.dump(technician_id_map, f)

print("Model and preprocessing objects saved.")

Model and preprocessing objects saved.


  saving_api.save_model(


## **5. Recommendation**

### **5.1 Content-Based Filtering**
Load the saved model and preprocessing artifacts

In [97]:
content_based_model = tf.keras.models.load_model('content_based_filtering.h5')
with open('tfidf_vectorizer.pkl', 'rb') as f:
    tfidf = pickle.load(f)
with open('scaler_experience.pkl', 'rb') as f:
    scaler_experience = pickle.load(f)
with open('scaler_ratings.pkl', 'rb') as f:
    scaler_ratings = pickle.load(f)
with open('encoder.pkl', 'rb') as f:
    encoder = pickle.load(f)

In [98]:
def content_based_filtering(user_skill):
    # Preprocess the user input skill
    user_skill_tfidf = tfidf.transform([user_skill]).toarray()

    # Prepare the input data
    if user_skill_tfidf.shape[1] < X.shape[1]:
        X_input = np.hstack([user_skill_tfidf, np.zeros((1, X.shape[1] - user_skill_tfidf.shape[1]))])
    else:
        X_input = user_skill_tfidf[:, :X.shape[1]]

    # Predict scores for the user input skill
    predicted_score = content_based_model.predict(X_input).flatten()[0]

    # Combine with experience, certifications, and ratings
    best_match_score = -1
    best_technician_index = -1

    for idx in range(X.shape[0]):
        technician = technicians_df.iloc[idx]
        skill_match = user_skill.lower() in technician['skills'].lower()  # Ensure exact phrase matching
        if skill_match:
            combined_score = (predicted_score +
                              technician['experience'] +
                              technician['ratingsreceived'] +
                              certifications_encoded_sparse[idx].sum())
            if combined_score > best_match_score:
                best_match_score = combined_score
                best_technician_index = idx

    if best_technician_index != -1:
        return ori_technician.iloc[best_technician_index]
    else:
        return "No matching technician found."

In [99]:
# Example usage
user_skill = "computer repair"
best_technician = content_based_filtering(user_skill)
print(best_technician)

technicianid                                               186
name                                      Wisnu Nugraha Irawan
phonenumber                                      6284311165480
email                                    wisnuirawan@gmail.com
skills                        Plumbing Repair, Computer Repair
experience                                                  14
certifications     SKA Ahli Teknik Plambing dan Pompa Mekanika
address                                  Jl. Abdul Muis No. 15
location                                             Surakarta
ratingsreceived                                            5.0
Name: 185, dtype: object


### **5.2 Collaborative Filtering**
Load the saved model and preprocessing artifacts

In [100]:
collaborative_model = tf.keras.models.load_model('collaborative_filtering.h5')

# Load the user_id_map and technician_id_map
with open('user_id_map.pkl', 'rb') as f:
    user_id_map = pickle.load(f)

with open('technician_id_map.pkl', 'rb') as f:
    technician_id_map = pickle.load(f)


In [101]:
def collaborative_filtering(user_id):
    # Map the user ID to the corresponding index
    if user_id not in user_id_map:
        return "User ID not found."

    user_idx = user_id_map[user_id]

    # Predict ratings for all technicians for the given user
    user_input = np.array([user_idx] * num_technicians)
    technician_input = np.arange(num_technicians)
    predicted_ratings = collaborative_model.predict([user_input, technician_input])

    # Combine the predicted ratings with technician indices
    predictions = [(tech_id, pred) for tech_id, pred in zip(technician_id_map.keys(), predicted_ratings.flatten())]

    # Sort the predictions by rating in descending order
    sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

    # Return the top recommendation
    top_recommendation = sorted_predictions[0]
    top_technician = ori_technician[technicians_df['technicianid'] == top_recommendation[0]].iloc[0]
    return top_technician

In [102]:
# Example usage
user_id = 1
recommendation = collaborative_filtering(user_id)
print(recommendation)

technicianid                                   136
name                               Jarwadi Pradana
phonenumber                         62861619190844
email                     jarwadipradana@gmail.com
skills             Computer Maintenance, AC Repair
experience                                       1
certifications                                 NaN
address                     Jl. Yos Sudarso No. 16
location                                 Pagaralam
ratingsreceived                                4.1
Name: 135, dtype: object


### **5.3 Hybrid Recommendation System**

In [103]:
def hybrid_recommendation(user_id, user_skill):
    # Get collaborative filtering recommendation
    collab_recommendation = collaborative_filtering(user_id)

    # Get content-based filtering recommendation
    content_recommendation = content_based_filtering(user_skill)

    # Extract and compute scores
    collab_score = collab_recommendation['ratingsreceived'] + collab_recommendation['experience'] + certifications_encoded_sparse[collab_recommendation.name].sum()
    content_score = content_recommendation['ratingsreceived'] + content_recommendation['experience'] + certifications_encoded_sparse[content_recommendation.name].sum()

    # Combine the scores (simple weighted average or other logic can be applied here)
    combined_score = (collab_score + content_score) / 2

    # Compare scores and return the best recommendation
    if collab_score >= combined_score:
        return collab_recommendation
    else:
        return content_recommendation

In [104]:
# Example usage
user_id = 150
user_skill = 'plumbing maintenance'
recommendation = hybrid_recommendation(user_id, user_skill)
print(recommendation)

technicianid                                                      85
name                                              Yoga Adika Narpati
phonenumber                                            6282753036164
email                                          yoganarpati@gmail.com
skills                  Plumbing Maintenance, Washing Machine Repair
experience                                                        14
certifications     Sertifikat Kompetensi BNSP Teknisi Plumbing & ...
address                                      Jl. Ahmad Dahlan No. 52
location                                                    Bengkulu
ratingsreceived                                                  4.5
Name: 84, dtype: object
