In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [12]:
# 1. Load the dataset (replace with the actual path to your downloaded CSV)
# Example dataset:

data = pd.read_csv('/empexpsaldataset.csv')
data.head()

Unnamed: 0,YearsExperience,Salary
0,1.2,39344.0
1,1.4,46206.0
2,1.6,37732.0
3,2.1,43526.0
4,2.3,39892.0


In [13]:
# 2. Prepare the data
# Assuming 'YearsExperience' as the feature and 'Salary' as the target
X = data[['YearsExperience']]
y = data['Salary']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features for better performance)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Define and train models
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest Regressor": RandomForestRegressor(random_state=42),
    "Decision Tree Regression": DecisionTreeRegressor()
}

results = {}

for name, model in models.items():
    # Use scaled data for SVR, original data for others in this simple example
    if name == "Support Vector Regressor":
        model.fit(X_train_scaled, y_train)
        predictions = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, predictions)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, predictions)

    results[name] = {"MSE": mse, "RMSE": rmse, "R-squared": r2}

# 4. Display results
print("Regression Model Comparison:")
for name, metrics in results.items():
    print(f"\n--- {name} ---")
    print(f"  Mean Squared Error (MSE): {metrics['MSE']:.2f}")
    print(f"  Root Mean Squared Error (RMSE): {metrics['RMSE']:.2f}")
    print(f"  R-squared (R^2) Score: {metrics['R-squared']:.4f}")


Regression Model Comparison:

--- Linear Regression ---
  Mean Squared Error (MSE): 49830096.86
  Root Mean Squared Error (RMSE): 7059.04
  R-squared (R^2) Score: 0.9024

--- Random Forest Regressor ---
  Mean Squared Error (MSE): 63721129.71
  Root Mean Squared Error (RMSE): 7982.55
  R-squared (R^2) Score: 0.8753

--- Decision Tree Regression ---
  Mean Squared Error (MSE): 101047709.83
  Root Mean Squared Error (RMSE): 10052.25
  R-squared (R^2) Score: 0.8022
