In [22]:
# Core
import os
import numpy as np
import pandas as pd

# Visualization (optional)
import matplotlib.pyplot as plt

# Sklearn utilities
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Regression models
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

# XGBoost
from xgboost import XGBRegressor

# Warnings
import warnings
warnings.filterwarnings("ignore")


In [23]:
# Adjust path if needed
csv_path = "../data/productivity/productivity_daily.csv"

print("Using CSV:", os.path.abspath(csv_path))
print("Exists:", os.path.exists(csv_path))

df = pd.read_csv(csv_path)
print("Shape:", df.shape)

df.head()


Using CSV: d:\imp\certificate\project\Working-Project\EMPLOYEE\Time-Project-Analysis\data\productivity\productivity_daily.csv
Exists: True
Shape: (364, 5)


Unnamed: 0,user_id,date,score,completed,total
0,2,2026-12-12,100,6,6
1,2,2026-12-11,93,5,5
2,2,2026-12-10,50,4,8
3,2,2026-12-09,89,5,6
4,2,2026-12-08,70,6,9


In [24]:
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").reset_index(drop=True)


In [25]:
y = df["score"].astype(float)


In [26]:
# Trend index
df["idx"] = np.arange(len(df))

# Day of week (0 = Monday)
df["dow"] = df["date"].dt.weekday

# Avoid division by zero
df["completion_ratio"] = df["completed"] / df["total"].replace(0, 1)

X = df[
    [
        "idx",
        "dow",
        "completed",
        "total",
        "completion_ratio"
    ]
]

X.head()


Unnamed: 0,idx,dow,completed,total,completion_ratio
0,0,3,3,5,0.6
1,1,4,4,6,0.666667
2,2,5,6,9,0.666667
3,3,6,5,7,0.714286
4,4,0,6,9,0.666667


In [27]:
print("X shape:", X.shape)
print("y shape:", y.shape)


X shape: (364, 5)
y shape: (364,)


In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

print("Train size:", X_train.shape)
print("Test size :", X_test.shape)


Train size: (291, 5)
Test size : (73, 5)


In [29]:
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(random_state=42),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
    "XGBoost": XGBRegressor(
        n_estimators=150,
        learning_rate=0.05,
        max_depth=6,
        random_state=42,
        objective="reg:squarederror"
    )
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    results.append({
        "Model": name,
        "MAE": round(mean_absolute_error(y_test, preds), 2),
        "RMSE": round(np.sqrt(mean_squared_error(y_test, preds)), 2),
        "R2": round(r2_score(y_test, preds), 3)
    })

comparison_df = pd.DataFrame(results).sort_values("MAE")
comparison_df


Unnamed: 0,Model,MAE,RMSE,R2
0,Linear Regression,3.73,4.64,0.893
6,XGBoost,4.46,5.39,0.855
2,Random Forest,4.47,5.37,0.856
3,Gradient Boosting,4.65,5.62,0.842
1,Decision Tree,5.86,7.16,0.745
4,SVR,11.09,14.25,-0.013
5,KNN,11.1,14.08,0.012
