In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

In [3]:
# Load datasets
train_df = pd.read_csv("recommt1.csv")
predict_df = pd.read_csv("recommp1.csv")

In [23]:
# Features & Labels
features = ["assigned_tasks", "avg_difficulty", "locality_rating", "citizen_rating"]
X = train_df[features]
y = train_df["suitability_score"]

In [7]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [27]:
# Train RandomForest Regressor
rf = RandomForestRegressor(n_estimators=300, random_state=42)
rf.fit(X_train_scaled, y_train)

In [29]:
# Predict on new workers
predict_df_scaled = scaler.transform(predict_df[features])
predict_df["predicted_score"] = rf.predict(predict_df_scaled)

In [17]:
# Sort Top 10 Recommendations
leaderboard = predict_df.sort_values("predicted_score", ascending=False).reset_index(drop=True)

print("\n🎯 Top 10 Recommended Workers:")
print(leaderboard.head(15)[[
    "worker_id", "assigned_tasks", "avg_difficulty",
    "locality_rating", "citizen_rating", "predicted_score"
]])



🎯 Top 10 Recommended Workers:
    worker_id  assigned_tasks  avg_difficulty  locality_rating  \
0        1003               0               0                5   
1        1029               0               0                4   
2        1100               0               0                4   
3        1074               0               0                3   
4        1104               0               0                3   
5        1046               0               0                3   
6        1125               0               0                1   
7        1098               1               1                5   
8        1065               0               0                1   
9        1049               1               1                3   
10       1032               1               1                4   
11       1050               1               1                3   
12       1053               1               2                4   
13       1031               1               2

In [None]:
# Save leaderboard
leaderboard.to_csv("recommended.csv", index=False)
print("Leaderboard saved as recommended.csv")

✅ Leaderboard saved as recommended.csv


In [21]:
import joblib
joblib.dump(rf, "worker_recommendation_model.pkl")
joblib.dump(scaler, "scaler_recommendation.pkl")

print("Model and scaler saved as worker_recommendation_model.pkl & scaler_recommendation.pkl")

Model and scaler saved as worker_recommendation_model.pkl & scaler_recommendation.pkl
