In [2]:
import pandas as pd
from xerparser.reader import Reader


def read_xer_file(file):
    xer = Reader(file)

    # Initialize empty lists
    project_names = []
    activity_counts = []
    wbs_names = []
    task_names = []
    start_dates = []
    end_dates = []
    duration = []
    ids = []
    constraints = []
    drivingPathFlag=[]
    freeFloat=[]
    task_type=[]
    totalFloat=[]
    calendar=[]
    activitycodes=[]
    task_id=[]


    # Loop through each project in XER file
    for project in xer.projects:
        # Get activity count for current project
        num_activities = len(project.activities)
        
        # Store project name and activity count
        project_names += [project.proj_short_name] * num_activities
        activity_counts += [num_activities] * num_activities
        
        # Loop through each WBS in project
        for wbs in project.wbss:
            # Store WBS name
            wbs_names += [wbs.wbs_name] * len(wbs.activities)
            
            # Loop through each activity in WBS
            
            for act in wbs.activities:
                # print (dir(act))
                task_names.append(act.task_name)
                start_dates.append(act.start_date)
                end_dates.append(act.end_date)
                ids.append(act.id)
                task_id.append(act.task_id)
                duration.append(act.duration)
        

                constraints.append(act.constraints)
                drivingPathFlag.append(act.driving_path_flag)
                freeFloat.append(act.free_float_hr_cnt)
                task_type.append(act.task_type)
                totalFloat.append(act.total_float_hr_cnt)
                calendar.append(act.calendar)
                activitycodes.append(act.activitycodes)
    # Create a DataFrame from the collected data
    df = pd.DataFrame({
        'ID': ids,
        'Project Name': project_names,
        'Activity Count': activity_counts,
        'WBS Name': wbs_names,
        'Task Name': task_names,
        'Start Date': start_dates,
        'End Date': end_dates,
        'Duration': duration,
        'Constraints': constraints,
        'Driving Path Flag': drivingPathFlag,
        'Free Float': freeFloat,
        'Task Type': task_type,
        'Total Float': totalFloat,
        'Calendar': calendar,
        'Activity Codes': activitycodes,
        'Task ID': task_id,
    })

    return df

file = r"xer/Sample_Xer_02.xer"

df = read_xer_file(file)
  
print(df.head())
        


       ID Project Name  Activity Count   WBS Name  \
0  185546     Baseline              27      CIVIL   
1  185547     Baseline              27  LONG LEAD   
2  185548     Baseline              27  LONG LEAD   
3  185549     Baseline              27  LONG LEAD   
4  185550     Baseline              27  LONG LEAD   

                       Task Name          Start Date            End Date  \
0     CIVIL WORKS GENERAL LAYOUT 2012-01-01 00:00:00 2012-01-15 18:00:00   
1  Pump -  Request for Quotation 2012-01-15 18:00:00 2012-02-14 17:00:00   
2          Pump -  Bidding Phase 2012-02-14 17:00:00 2012-02-21 16:00:00   
3  Pump -  Commercial Evaluation 2012-02-21 16:00:00 2012-03-06 15:00:00   
4         Pump -  Purchase Order 2012-03-06 15:00:00 2012-03-31 14:00:00   

    Duration Constraints Driving Path Flag  Free Float Task Type  Total Float  \
0  13.750000        None                 N         0.0   TT_Task       4956.0   
1  29.958333        None                 N         0.0   TT_Ta