In [61]:
import pandas as pd
import numpy as np
from datetime import datetime

In [62]:
# ---- Load CSVs ----
team_df = pd.read_csv('team.csv')
budgets_df = pd.read_csv('budgets.csv')
holidays_df = pd.read_csv('holidays.csv')
leaves_df = pd.read_csv('leaves.csv')
logged_df = pd.read_csv('logged.csv')

In [63]:
# ---- Clean column names ----
for df in [budgets_df, holidays_df, leaves_df, logged_df]:
    df.columns = df.columns.str.strip()

In [64]:
# ---- Date range ----
from_date = 20250701
to_date = 20250715

from_dt = datetime.strptime(str(from_date), "%Y%m%d")
to_dt = datetime.strptime(str(to_date), "%Y%m%d")
total_days = (to_dt - from_dt).days + 1

In [65]:
# ---- Holidays & leaves ----
num_holidays = holidays_df[
    (holidays_df['day'] >= from_date) & (holidays_df['day'] <= to_date)
].shape[0]

leaves_in_range = leaves_df[
    (leaves_df['day'] >= from_date) & (leaves_df['day'] <= to_date)
]

leaves_count = leaves_in_range.groupby('id').size().reset_index(name='leaves')

In [66]:
# ---- Build base report ----
report_df = team_df.merge(leaves_count, on='id', how='left').fillna(0)
report_df['leaves'] = report_df['leaves'].astype(int)
report_df['holidays'] = num_holidays
report_df['total_days'] = total_days
report_df['work_days'] = report_df['total_days'] - report_df['holidays'] - report_df['leaves']

In [67]:
# ---- Logged ----
logged_days = logged_df.groupby('id')['logged'].sum().reset_index(name='logged_days')
report_df = report_df.merge(logged_days, on='id', how='left').fillna(0)

In [68]:
# ---- Pivot tasks: original logged ----
task_pivot = logged_df.pivot_table(index='id', columns='task', values='logged', aggfunc='sum').reset_index().fillna(0)
report_df = report_df.merge(task_pivot, on='id', how='left')

In [69]:
# Ensure all task columns exist and no NaN
for task in ['task1', 'task2', 'task3', 'task4']:
    if task not in report_df.columns:
        report_df[task] = 0.0
report_df[['task1', 'task2', 'task3', 'task4']] = report_df[['task1', 'task2', 'task3', 'task4']].fillna(0)


In [70]:
# ---- Add total_original_logged for clarity ----
report_df['total_original_logged'] = report_df[['task1', 'task2', 'task3', 'task4']].sum(axis=1)

In [71]:
# ---- Compute balance_days ----
report_df['balance_days'] = report_df['work_days'] - report_df['logged_days']

In [72]:
# ---- Compute total team capacity ----
total_team_work_days = report_df['work_days'].sum()

In [73]:
# ---- Task budgets and current totals ----
task_budgets = {}
task_totals = {}

for _, row in budgets_df.iterrows():
    task = row['task']
    budget = row['budget']
    task_budgets[task] = total_team_work_days * budget
    task_totals[task] = report_df[task].sum()

print("\n=== Task budgets (days): ===")
print(task_budgets)
print("\n=== Task totals before balancing: ===")
print(task_totals)


=== Task budgets (days): ===
{'task1': np.float64(132.0), 'task2': np.float64(396.0), 'task3': np.float64(528.0), 'task4': np.float64(3960.0)}

=== Task totals before balancing: ===
{'task1': np.float64(0.0), 'task2': np.float64(3.0), 'task3': np.float64(3.56), 'task4': np.float64(4.37)}


In [74]:
# ---- Shuffle for fair distribution ----
report_df = report_df.sample(frac=1).reset_index(drop=True)

In [75]:
# ---- Add new columns for balanced allocation ----
for task in ['task1', 'task2', 'task3', 'task4']:
    report_df[f'log_{task}'] = 0.0

In [76]:
# ---- Distribute balance_days fairly ----
np.random.seed(42)  # optional

for idx, row in report_df.iterrows():
    balance = row['balance_days']
    if balance <= 0:
        continue

    available_tasks = [task for task in ['task1', 'task2', 'task3', 'task4']
                       if (task_budgets[task] - task_totals[task]) > 0]

    if not available_tasks:
        continue

    random_weights = np.random.rand(len(available_tasks))
    normalized_weights = random_weights / random_weights.sum()

    for i, task in enumerate(available_tasks):
        portion = round(balance * normalized_weights[i], 2)
        remaining_budget = round(task_budgets[task] - task_totals[task], 2)
        assign = min(portion, remaining_budget)

        report_df.at[idx, f'log_{task}'] += assign
        task_totals[task] += assign

In [77]:
# ---- Final calculations ----
report_df['total_logged'] = report_df[['log_task1', 'log_task2', 'log_task3', 'log_task4']].sum(axis=1)
report_df['leftover_balance_days'] = report_df['balance_days'] - report_df['total_logged']

In [78]:
# ---- Sort by ID ----
report_df = report_df.sort_values(by='id').reset_index(drop=True)

In [79]:
# ---- Final columns ----
final_cols = [
    'id', 'name', 'leaves', 'holidays', 'total_days', 'work_days',
    'logged_days', 'task1', 'task2', 'task3', 'task4', 'total_original_logged',
    'balance_days', 'log_task1', 'log_task2', 'log_task3', 'log_task4',
    'total_logged', 'leftover_balance_days'
]

report_df = report_df[final_cols]

In [80]:
# ---- Show final report ----
print("\n=== Final Team Allocation Report ===\n")
report_df


=== Final Team Allocation Report ===



Unnamed: 0,id,name,leaves,holidays,total_days,work_days,logged_days,task1,task2,task3,task4,total_original_logged,balance_days,log_task1,log_task2,log_task3,log_task4,total_logged,leftover_balance_days
0,p001,aaa,1,1,15,13,3.0,0.0,3.0,0.0,0.0,3.0,10.0,0.02,3.61,3.13,3.23,9.99,0.01
1,p002,bbb,1,1,15,13,7.93,0.0,0.0,3.56,4.37,7.93,5.07,0.12,1.73,1.76,1.47,5.08,-0.01
2,p003,ccc,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,8.27,2.11,1.81,1.82,14.01,-0.01
3,p004,ddd,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,1.97,5.01,3.86,3.16,14.0,0.0
4,p005,eee,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.51,6.8,5.32,1.37,14.0,0.0
5,p006,fff,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,6.08,1.39,2.9,3.64,14.01,-0.01
6,p007,ggg,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,2.95,2.06,6.29,2.71,14.01,-0.01
7,p008,hhh,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,2.79,0.9,6.27,4.04,14.0,0.0
8,p009,iii,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,3.92,3.74,2.5,3.85,14.01,-0.01
9,p010,jjj,0,1,15,14,0.0,0.0,0.0,0.0,0.0,0.0,14.0,2.23,4.3,1.12,6.36,14.01,-0.01


In [81]:
# ---- Check final task totals vs budget ----
print("\n=== Final Task Totals (original + balanced) vs Budget ===\n")
for task in ['task1', 'task2', 'task3', 'task4']:
    final_total = round(report_df[task].sum() + report_df[f'log_{task}'].sum(), 2)
    budget = round(task_budgets[task], 2)
    print(f"{task}: {final_total} / {budget} days")


=== Final Task Totals (original + balanced) vs Budget ===

task1: 54.29 / 132.0 days
task2: 66.51 / 396.0 days
task3: 59.89 / 528.0 days
task4: 83.36 / 3960.0 days
