In [4]:
# Importing Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer

# Load the dataset again
df = pd.read_csv("C:/Users/kondu/Downloads/Day(1)/day1_updated_mental_health.xls")

# Feature Engineering: Encoding and Scaling

# Encoding categorical columns (e.g., 'Gender', 'Country', 'JobRole', 'Department', etc.)
df = pd.get_dummies(df, columns=['Gender', 'Country', 'JobRole', 'Department', 'RemoteWork', 'HasMentalHealthSupport', 'HasTherapyAccess', 'SalaryRange'], drop_first=True)

# Scaling numerical columns
scaler = StandardScaler()
numerical_cols = ['Age', 'WorkHoursPerWeek', 'StressLevel', 'ProductivityScore', 'SleepHours', 'PhysicalActivityHrs', 'ManagerSupportScore', 'MentalHealthDaysOff', 'WorkLifeBalanceScore', 'CareerGrowthScore']
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

# Define X and y
X = df.drop(['BurnoutLevel', 'BurnoutRisk', 'EmployeeID'], axis=1)  # Drop target variable and identifier
y = df['BurnoutLevel']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Models

# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

# Ridge Regression
ridge_model = Ridge(alpha=1)
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)

# Lasso Regression
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
y_pred_lasso = lasso_model.predict(X_test)

# Evaluate Models

print("Linear Regression - R²: ", r2_score(y_test, y_pred_linear))
print("Linear Regression - MSE: ", mean_squared_error(y_test, y_pred_linear))

print("Ridge Regression - R²: ", r2_score(y_test, y_pred_ridge))
print("Ridge Regression - MSE: ", mean_squared_error(y_test, y_pred_ridge))

print("Lasso Regression - R²: ", r2_score(y_test, y_pred_lasso))
print("Lasso Regression - MSE: ", mean_squared_error(y_test, y_pred_lasso))

Linear Regression - R²:  -0.023988645884685855
Linear Regression - MSE:  6.953703083769445
Ridge Regression - R²:  -0.02392709167459106
Ridge Regression - MSE:  6.9532850813801845
Lasso Regression - R²:  -0.0030426763341608876
Lasso Regression - MSE:  6.811463173550334


In [5]:
# Save the cleaned or processed dataset
df.to_csv("day2_updated_mental_health.csv", index=False)
print("✅ Day 2 updated dataset saved as 'day2_updated_mental_health.csv'")


✅ Day 2 updated dataset saved as 'day2_updated_mental_health.csv'


In [6]:
from IPython.display import FileLink

# Provide download link
FileLink("day2_updated_mental_health.csv")
