In [1]:
import pandas as pd 
import numpy as np
from datetime import datetime, timedelta
from src.utils import isWorkday, estEndDate, isParentCompleted, isWeekend, calcLength, delay, assessWeather, isHeavyWeather
import pyodbc
import random
from tqdm import tqdm

In [None]:
server = 'LAPTOP-2NSE0JH1\SQLEXPRESS'
database = 'dummy'

conn_str = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE={database};Trusted_Connection=yes;'

try:
    conn = pyodbc.connect(conn_str)

    cursor = conn.cursor()

    task_query = "SELECT * FROM Task"
    tasks = pd.read_sql(task_query, conn)

    project_query = "SELECT * FROM Project"
    projects = pd.read_sql(project_query, conn)

    conn.close()

except pyodbc.Error as e:
    print("Error connecting to SQL Server:", e)

In [2]:
tasks = pd.read_csv('data/10_2020/task.csv')
projects = pd.read_csv('data/10_2020/project.csv')

In [3]:
weather_historical = pd.read_csv('data/weather/weather_historical.csv')
weather_historical['datetime'] = pd.to_datetime(weather_historical['datetime'])
weather_historical = weather_historical.set_index('datetime')

In [4]:
tasks['ParentTaskID'] = tasks['ParentTaskID'].astype('Int64') 
tasks['StartDate'] = pd.to_datetime(tasks['StartDate'])
tasks['EndDate'] = pd.to_datetime(tasks['EndDate'])
tasks['ActualStartDate'] = pd.to_datetime(tasks['ActualStartDate'])
tasks['ActualEndDate'] = pd.to_datetime(tasks['ActualEndDate'])
tasks['WeatherAssessment'] = tasks.apply(lambda x: assessWeather(x['StartDate'],projects.loc[projects['ID']==x['ProjectID']]['Workday'].iloc[0], weather_historical),axis=1)
tasks['TaskLength'] = calcLength(tasks['ParentTaskID'])

In [5]:
# tasks.to_csv('data/100_2016/task.csv', index=False)
# projects.to_csv('data/100_2016/project.csv', index=False)

In [6]:
curr_date = tasks['StartDate'].min()
task_report = []
project_report = []

total_tasks = len(tasks)
pbar = tqdm(total=total_tasks, desc="Progress")

while ~tasks['Status'].eq('Completed').all():
    task_today = tasks[(tasks['StartDate'] <= curr_date) & (tasks['Status']!='Completed')]['ID'].tolist()
    heavy_weather = isHeavyWeather(curr_date, weather_historical)

    for idx in task_today:
        task = tasks.loc[tasks['ID']==idx].iloc[0]
        workday = projects.loc[projects['ID'] == task['ProjectID']].iloc[0]['Workday']
        if isParentCompleted(task, tasks):
            if task['Status'] == 'Not Started':
                task['ActualStartDate'] = str(curr_date)
                task['Status'] = 'On Progress'
            
            if isWorkday(curr_date, workday):
                if delay(task, task_today, curr_date, heavy_weather):
                    task['Progress'] += 0
                else:
                    task['Progress'] += 1
                
            if task['Progress'] >= task['Duration']:
                task['ActualEndDate'] = curr_date
                task['Status'] = 'Completed'
                pbar.update(1)
            
            if curr_date > task['EndDate'] and task['Status'] == 'On Progress':
                task['Status'] = 'Delayed'
                task['Priority'] = 'Critical'
            
            tasks.loc[tasks['ID']==idx] = task.values
            
            task_report.append({
                'Date': curr_date,
                'ID': task['ID'],
                'Name': task['Name'],
                'StartDate': task['StartDate'],
                'EndDate': task['EndDate'],
                'Cost': task['Cost'],
                'Priority': task['Priority'],
                'Progress': task['Progress'],
                'ProjectID' : task['ProjectID'],
                'Status': task['Status'],
                'Duration': task['Duration'],
                'Trade' : task['Trade'],
                'TaskLength' : task['TaskLength'],
                'IsBadWeather' : heavy_weather,
                'WeatherAssessment' : task['WeatherAssessment'],
                'ActualStartDate': task['ActualStartDate'],
                'ActualEndDate': task['ActualEndDate']
            })
            
    for pid in projects['ID'].tolist():
        project_task = tasks[tasks['ProjectID']==pid]
        if ~project_task['Status'].eq('Not Started').all() and ~project_task['Status'].eq('Completed').all():
            project_report.append({
                'Date' : curr_date,
                'ProjectID' : pid,
                'TotalTask' : len(project_task),
                'StartedTask' : len(project_task[project_task['Status']!='Not Started']),
                'OnGoingTask' : len(project_task[(project_task['Status']!='Not Started') & (project_task['Status']!='Completed')]),
                'DelayedTask' : len(project_task[(project_task['Status']=='Delayed')]),
                'CompletedTask' : len(project_task[(project_task['Status']=='Completed')]),
                'WorkDay' : project_task[(project_task['Status']!='Not Started')]['Progress'].sum(),
                'TotalSpent' : project_task[(project_task['Status']=='Completed')]['Cost'].sum(),
                'IsBadWeather' : heavy_weather
            })

    curr_date += timedelta(days=1)

task_reports = pd.DataFrame(task_report)
project_reports = pd.DataFrame(project_report)
task_reports['ActualEndDate'] = task_reports.groupby('ID')['ActualEndDate'].bfill()

project_dates = tasks.groupby('ProjectID').agg({'StartDate': 'min', 'EndDate': 'max', 'ActualStartDate':'min','ActualEndDate':'max'}).reset_index()
project_dates['WeatherAssessment'] = project_dates.apply(lambda x: assessWeather(x['StartDate'],projects.loc[projects['ID']==x['ProjectID']]['Workday'].iloc[0],weather_historical),axis=1)
project_reports = pd.merge(project_reports, project_dates, on='ProjectID', how='left')

print(f'Complete all task at {curr_date}')
pbar.close()

Progress: 100%|██████████| 1084/1084 [00:57<00:00, 18.87it/s]

Complete all task at 2023-08-22 00:00:00





In [7]:
task_reports[task_reports['ID']==749]

Unnamed: 0,Date,ID,Name,StartDate,EndDate,Cost,Priority,Progress,ProjectID,Status,Duration,Trade,TaskLength,IsBadWeather,WeatherAssessment,ActualStartDate,ActualEndDate
0,2020-02-26,749,building,2020-02-26,2020-03-10,1091.0,Critical,1,8,On Progress,10,6,0,1.0,33.333333,2020-02-26 00:00:00,2020-03-10
1,2020-02-27,749,building,2020-02-26,2020-03-10,1091.0,Critical,2,8,On Progress,10,6,0,0.0,33.333333,2020-02-26 00:00:00,2020-03-10
2,2020-02-28,749,building,2020-02-26,2020-03-10,1091.0,Critical,3,8,On Progress,10,6,0,0.0,33.333333,2020-02-26 00:00:00,2020-03-10
3,2020-02-29,749,building,2020-02-26,2020-03-10,1091.0,Critical,3,8,On Progress,10,6,0,0.0,33.333333,2020-02-26 00:00:00,2020-03-10
4,2020-03-01,749,building,2020-02-26,2020-03-10,1091.0,Critical,3,8,On Progress,10,6,0,0.0,33.333333,2020-02-26 00:00:00,2020-03-10
6,2020-03-02,749,building,2020-02-26,2020-03-10,1091.0,Critical,4,8,On Progress,10,6,0,0.0,33.333333,2020-02-26 00:00:00,2020-03-10
8,2020-03-03,749,building,2020-02-26,2020-03-10,1091.0,Critical,5,8,On Progress,10,6,0,0.0,33.333333,2020-02-26 00:00:00,2020-03-10
10,2020-03-04,749,building,2020-02-26,2020-03-10,1091.0,Critical,6,8,On Progress,10,6,0,1.0,33.333333,2020-02-26 00:00:00,2020-03-10
12,2020-03-05,749,building,2020-02-26,2020-03-10,1091.0,Critical,7,8,On Progress,10,6,0,1.0,33.333333,2020-02-26 00:00:00,2020-03-10
14,2020-03-06,749,building,2020-02-26,2020-03-10,1091.0,Critical,8,8,On Progress,10,6,0,1.0,33.333333,2020-02-26 00:00:00,2020-03-10


In [None]:
task_reports.to_csv('data/100_2016/task_report.csv',index=False)
project_reports.to_csv('data/100_2016/project_report.csv',index=False)