In [2]:
# Mounting Google Drive to Colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np


df = pd.read_csv('/content/drive/MyDrive/530/hr_dashboard_data.csv')

# Select features for prediction
features = ['Age', 'Projects Completed', 'Productivity (%)', 'Feedback Score', 'Salary', 'Department', 'Position']  # Include 'Department' and 'Position'
X = df[features]
y = df['Satisfaction Rate (%)']

# Encode categorical variables
X = pd.get_dummies(X, columns=['Department', 'Position'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared Score: {r2}")

# Feature importance
feature_importance = pd.DataFrame({'feature': X.columns, 'importance': abs(model.coef_)})
feature_importance = feature_importance.sort_values('importance', ascending=False)
print("\nTop 5 Most Important Features:")
print(feature_importance.head())

# Function to predict satisfaction rate for a new employee
def predict_satisfaction(employee_data):
    employee_df = pd.DataFrame([employee_data])
    employee_df = pd.get_dummies(employee_df, columns=['Department', 'Position'])

    for col in X.columns:
        if col not in employee_df.columns:
            employee_df[col] = 0
    employee_df = employee_df[X.columns]
    return model.predict(employee_df)[0]


Root Mean Squared Error: 29.8403372898038
R-squared Score: 0.01866011712957949

Top 5 Most Important Features:
                      feature  importance
11            Position_Intern   14.755590
12  Position_Junior Developer   11.415424
13           Position_Manager   10.118562
15         Position_Team Lead   10.072182
8        Department_Marketing    5.804001


In [4]:
# New employee example
new_employee = {
    'Age': 30,
    'Projects Completed': 10,
    'Productivity (%)': 75,
    'Feedback Score': 4.0,
    'Salary': 70000,
    'Department': 'IT',
    'Position': 'Analyst'
}

predicted_satisfaction = predict_satisfaction(new_employee)
print(f"\nPredicted Satisfaction Rate for the new employee: {predicted_satisfaction:.2f}%")


Predicted Satisfaction Rate for the new employee: 48.14%


In [5]:
# New employee example
new_employee = {
    'Age': 30,
    'Projects Completed': 10,
    'Productivity (%)': 75,
    'Feedback Score': 4.0,
    'Salary': 100000,
    'Department': 'IT',
    'Position': 'Analyst'
}

predicted_satisfaction = predict_satisfaction(new_employee)
print(f"\nPredicted Satisfaction Rate for the new employee: {predicted_satisfaction:.2f}%")


Predicted Satisfaction Rate for the new employee: 50.97%


In [6]:
# New employee example
new_employee = {
    'Age': 30,
    'Projects Completed': 10,
    'Productivity (%)': 75,
    'Feedback Score': 4.0,
    'Salary': 70000,
    'Department': 'IT',
    'Position': 'Intern'
}

predicted_satisfaction = predict_satisfaction(new_employee)
print(f"\nPredicted Satisfaction Rate for the new employee: {predicted_satisfaction:.2f}%")


Predicted Satisfaction Rate for the new employee: 67.00%
