<a href="https://colab.research.google.com/github/siripoluri/ML-Colab-projects/blob/main/Salary_Prediction_with_all_features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

# Load dataset from CSV
df = pd.read_csv("Salary Data.csv")

# Define features and target
X = df.drop(columns=["Salary"])
y = df["Salary"]

# Handle missing values in the target variable
target_imputer = SimpleImputer(strategy="mean")
y = target_imputer.fit_transform(y.values.reshape(-1, 1)).ravel()

# Preprocess categorical data
categorical_features = ["Gender", "Education Level", "Job Title"]
numerical_features = ["Age", "Years of Experience"]

# Handle missing values and encode categorical variables
# Imputer for numerical features
num_imputer = SimpleImputer(strategy="mean")
X[numerical_features] = num_imputer.fit_transform(X[numerical_features])

# One-Hot Encoding for categorical variables
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
encoded_features = encoder.fit_transform(X[categorical_features])

# Create a DataFrame with the encoded features and add it back to the original data
encoded_df = pd.DataFrame(encoded_features, columns=encoder.get_feature_names_out(categorical_features))
X = X.drop(columns=categorical_features).reset_index(drop=True)
X = pd.concat([X, encoded_df], axis=1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Predict on the test set
predictions = regressor.predict(X_test)

# Output the model's coefficients, intercept, and predictions
model_coefficients = regressor.coef_
model_intercept = regressor.intercept_

# Combine predictions with the test set
X_test_with_predictions = X_test.copy()
X_test_with_predictions["Predicted Salary"] = predictions

print("Model Coefficients:", model_coefficients)
print("Model Intercept:", model_intercept)
print("\nTest Set with Predictions:\n", X_test_with_predictions)


Model Coefficients: [ 2.26807274e+03  1.41749879e+03 -3.85908722e+02  6.29128287e+02
 -2.43219565e+02 -4.92123196e+03 -2.39505185e+03  7.55950338e+03
 -2.43219565e+02 -9.89530236e-10 -1.78637512e+04 -4.46937179e+04
 -3.61164833e+03  1.12081830e+03 -1.93610177e+02 -1.04773790e-09
  8.71455454e+04  8.76609733e+04 -1.41781797e+04 -1.16415322e-09
  5.48628071e+03 -1.67089865e+04 -2.19714997e+04 -2.92812500e+04
  1.21508492e-09 -1.72708912e+04 -1.64981811e+04 -1.85853197e+04
  4.54867591e+03 -1.91781797e+04 -9.77143320e+03  5.32855236e+04
  3.60433861e+04  5.05795316e+04  3.88367061e+04  4.33728516e+04
  3.96872801e+04  3.62563582e+04  4.10751147e+04  3.05692834e+04
  3.59403158e+04  4.55795316e+04  2.60471533e+04 -3.69488231e+04
  4.30085774e+03 -2.07860767e+04  2.81568152e+04 -1.69101070e+04
 -2.75287155e+04 -7.94118343e+03 -1.96003567e+04 -3.46187079e-10
 -9.16236105e+03 -4.58385330e-10 -2.94716483e+04 -1.92604112e+04
 -2.14946776e+04 -2.51932167e+04 -2.15871403e+04 -2.53145882e+04
 -4.8