In [51]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
import pickle

df = pd.read_csv("employers_data.csv")

# Salary numeric
df["Salary"] = pd.to_numeric(df["Salary"])

# Drop unused columns (only if present)
for col in ["Name", "Department", "Location","Employee_ID"]:
    if col in df.columns:
        df.drop(col, axis=1, inplace=True)

# Remove missing values
df.dropna(inplace=True)

# Encoders
le_edu = LabelEncoder()
le_gen = LabelEncoder()
le_job = LabelEncoder()

df["Education_Level"] = le_edu.fit_transform(df["Education_Level"])
df["Gender"] = le_gen.fit_transform(df["Gender"])
df["Job_Title"] = le_job.fit_transform(df["Job_Title"])

# Features & target
X = df.drop("Salary", axis=1)
y = df["Salary"]

# Train model
model = LinearRegression()
model.fit(X, y)

# Save everything needed for inference
pickle.dump(model, open("salary_model.pkl", "wb"))
pickle.dump(le_edu, open("le_edu.pkl", "wb"))
pickle.dump(le_gen, open("le_gen.pkl", "wb"))
pickle.dump(le_job, open("le_job.pkl", "wb"))
pickle.dump(list(X.columns), open("features.pkl", "wb"))

print("Model trained. Features:", list(X.columns))


Model trained. Features: ['Age', 'Gender', 'Job_Title', 'Experience_Years', 'Education_Level']
