In [50]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

import pickle


In [51]:
df = pd.read_csv("data.csv")
df.head()


Unnamed: 0,CGPA,IQ,Year_of_Experience,Dependents,Salary,Gender,Marital_Status,Expenses
0,6.2,90.0,0.0,0.0,18000.0,Male,Single,12000.0
1,6.5,92.0,1.0,0.0,20000.0,Female,Single,13000.0
2,6.8,95.0,1.0,1.0,22000.0,Male,Married,15000.0
3,7.0,98.0,2.0,1.0,25000.0,Female,Married,17000.0
4,7.2,100.0,2.0,2.0,27000.0,Male,Married,19000.0


In [52]:
df["Salary"].isnull().sum()
df = df.dropna(subset=["Salary"])


In [53]:
X = df.drop("Salary", axis=1)
y = df["Salary"]


In [54]:
numeric_features = [
    "CGPA",
    "IQ",
    "Year_of_Experience",
    "Dependents",
    "Expenses"
]

categorical_features = [
    "Gender",
    "Marital_Status"
]


In [55]:
numeric_pipeline = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])


In [56]:
categorical_pipeline = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(drop="first", handle_unknown="ignore"))
])


In [57]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_pipeline, numeric_features),
        ("cat", categorical_pipeline, categorical_features)
    ]
)


In [58]:
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])


In [59]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [60]:
model.fit(X_train, y_train)


In [61]:
y_pred = model.predict(X_test)

print("R2 Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))


R2 Score: 0.9695897642464356
MAE: 2387.22076049006


In [62]:
with open("salary_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model saved successfully!")


Model saved successfully!
