In [5]:
import pandas as pd
import xgboost as xgb
import pickle

print("🔄 Loading data...")
df = pd.read_csv("vacancy_features_modified.csv")

# Convert key columns to numeric
numeric_cols = [ "num_skills", "num_certificates", "experience_years", "salary_avg"]
for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce")

experience_mapping = {
    "noExperience": 0,
    "between1And3": 1,
    "between3And6": 3,
    "moreThan6": 6
}
df["experience_id"] = df["experience_id"].map(experience_mapping)


# Drop rows with NaNs
df.dropna(subset=numeric_cols, inplace=True)
print(f"✅ Final training data shape: {df.shape}")

if df.empty:
    raise ValueError("🚨 ERROR: DataFrame is empty after preprocessing. Check your CSV!")

# Cap values to match form inputs
df["experience_years"] = df["experience_years"].clip(upper=10)
df["num_skills"] = df["num_skills"].clip(upper=5)
df["num_certificates"] = df["num_certificates"].clip(upper=5)

# Select features & target
features = ["experience_years", "num_skills", "num_certificates", "experience_id"]
target = "salary_avg"
X = df[features]
y = df[target]

# Train the model
print("📈 Training XGBRegressor...")
model = xgb.XGBRegressor(n_estimators=100, max_depth=4, random_state=42)
model.fit(X, y)

# Save the model
with open("xgb_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("💾 Model saved as xgb_model.pkl")


🔄 Loading data...
✅ Final training data shape: (771, 11)
📈 Training XGBRegressor...
💾 Model saved as xgb_model.pkl
