In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
import pickle

df=pd.read_csv("Salary Data.csv")
df=df.dropna()

df["Years of Experience"] = pd.to_numeric(df["Years of Experience"], errors="coerce")
df=df.dropna()  

# Features and target
X=df.drop("Salary", axis=1)
y=df["Salary"]

# Preprocess categorical features
categorical_features=["Gender", "Education Level", "Job Title"]
categorical_transformer=OneHotEncoder(handle_unknown='ignore')

preprocessor=ColumnTransformer(
    transformers=[
        ("cat", categorical_transformer, categorical_features)
    ], remainder="passthrough")

# Build pipeline
model=Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Evaluate
score=model.score(X_test, y_test)
print(f"Model R^2 Score: {score:.2f}")

# Save the model
with open("model.pkl", "wb") as file:
    pickle.dump(model, file)

print("✅ Model saved as model.pkl")


Model R^2 Score: 0.85
✅ Model saved as model.pkl
