<a href="https://colab.research.google.com/github/noelparakkal/NectarInternship/blob/master/Jobs_worker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import json
from google.colab import files
from datetime import datetime

# Step 1: Upload Excel
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

# Step 2: Read Excel from row 4
df = pd.read_excel(file_name, skiprows=3)

# Step 3: Clean and verify column names
df.columns = [str(c).strip().upper() for c in df.columns]
required_columns = ['ASSIGNEE', 'STATUS', 'ACTUAL START TIME', 'ACTUAL END TIME',
                    'SLA MET', 'MANAGED BY', 'COMMUNITY', 'SUB COMMUNITY',
                    'BUILDING', 'JOB TYPE', 'DISCIPLINE', 'PRIORITY']
missing_cols = [col for col in required_columns if col not in df.columns]
if missing_cols:
    raise ValueError(f"Missing columns: {', '.join(missing_cols)}")

# Step 4: Assignee counts
assignee_counts = df['ASSIGNEE'].dropna().value_counts()
assignee_counts = {str(k): int(v) for k, v in assignee_counts.items()}
missing_assignees = int(df['ASSIGNEE'].isna().sum())
assignee_counts['<Missing>'] = missing_assignees
assignee_summary = {
    "total": sum(assignee_counts.values()),
    "assignee_counts": assignee_counts
}

# Step 5: Status counts
status_counts = df['STATUS'].dropna().value_counts()
status_counts = {str(k): int(v) for k, v in status_counts.items()}
missing_status = int(df['STATUS'].isna().sum())
status_counts['<Missing>'] = missing_status
status_summary = {
    "total": sum(status_counts.values()),
    "status_counts": status_counts
}

# Step 6: Convert time columns with specific format
def parse_custom_datetime(dt_str):
    try:
        return datetime.strptime(str(dt_str), '%d %b %Y %H:%M:%S')
    except:
        return pd.NaT

df['ACTUAL START TIME'] = df['ACTUAL START TIME'].apply(parse_custom_datetime)
df['ACTUAL END TIME'] = df['ACTUAL END TIME'].apply(parse_custom_datetime)

# Step 7: Calculate duration in minutes
def calculate_duration_minutes(row):
    if pd.notna(row['ACTUAL START TIME']) and pd.notna(row['ACTUAL END TIME']):
        if row['ACTUAL END TIME'] > row['ACTUAL START TIME']:
            return (row['ACTUAL END TIME'] - row['ACTUAL START TIME']).total_seconds() / 60
    return None

df['DURATION_MINUTES'] = df.apply(calculate_duration_minutes, axis=1)

# Step 8: Time summary per assignee
time_summary = {}
grouped = df.dropna(subset=['ASSIGNEE']).groupby('ASSIGNEE')

for name, group in grouped:
    valid_durations = group['DURATION_MINUTES'].dropna()
    valid_durations = valid_durations[valid_durations > 0]

    average_duration = round(valid_durations.mean(), 2) if not valid_durations.empty else 0
    total_minutes = round(valid_durations.sum(), 2) if not valid_durations.empty else 0

    status_breakdown = group['STATUS'].value_counts(dropna=False).to_dict()
    status_breakdown = {str(k): int(v) for k, v in status_breakdown.items()}

    sla_values = group['SLA MET'].dropna().astype(str).str.strip().str.upper()
    sla_pass = int((sla_values == 'SLA-PASSED').sum())
    sla_fail = int((sla_values == 'SLA-FAILED').sum())
    total_sla = sla_pass + sla_fail
    sla_pass_pct = round((sla_pass / total_sla) * 100, 2) if total_sla > 0 else 0
    sla_fail_pct = round((sla_fail / total_sla) * 100, 2) if total_sla > 0 else 0

    manager_name = group['MANAGED BY'].dropna().astype(str).str.strip().unique()
    manager_name = manager_name[0] if len(manager_name) > 0 else "<Unknown>"

    location_counts = {}
    for _, row in group.iterrows():
        community = str(row['COMMUNITY']) if pd.notna(row['COMMUNITY']) else '<Unknown>'
        sub_community = str(row['SUB COMMUNITY']) if pd.notna(row['SUB COMMUNITY']) else '<Unknown>'
        building = str(row['BUILDING']) if pd.notna(row['BUILDING']) else '<Unknown>'
        location_key = f"{community}|{sub_community}|{building}"
        if location_key not in location_counts:
            location_counts[location_key] = {
                "community": community,
                "sub_community": sub_community,
                "building": building,
                "count": 0
            }
        location_counts[location_key]["count"] += 1
    location_breakdown = sorted(location_counts.values(), key=lambda x: x['count'], reverse=True)

    job_type_counts = {}
    for job_type in group['JOB TYPE']:
        job_type_str = str(job_type) if pd.notna(job_type) else '<Unknown>'
        job_type_counts[job_type_str] = job_type_counts.get(job_type_str, 0) + 1
    job_type_breakdown = [{"job_type": k, "count": v} for k, v in job_type_counts.items()]
    job_type_breakdown.sort(key=lambda x: x['count'], reverse=True)

    discipline_job_counts = {}
    for _, row in group.iterrows():
        discipline = str(row['DISCIPLINE']) if pd.notna(row['DISCIPLINE']) else '<Unknown>'
        job_type = str(row['JOB TYPE']) if pd.notna(row['JOB TYPE']) else '<Unknown>'
        priority = str(row['PRIORITY']) if pd.notna(row['PRIORITY']) else '<Unknown>'
        key = f"{discipline}|{job_type}"
        if key not in discipline_job_counts:
            discipline_job_counts[key] = {
                "discipline": discipline,
                "job_type": job_type,
                "count": 0,
                "priority_counts": {}
            }
        discipline_job_counts[key]["count"] += 1
        discipline_job_counts[key]["priority_counts"][priority] = discipline_job_counts[key]["priority_counts"].get(priority, 0) + 1

    discipline_job_breakdown = sorted(
        [
            {
                "discipline": data["discipline"],
                "job_type": data["job_type"],
                "count": data["count"],
                "priority_counts": [
                    {"priority": k, "count": v} for k, v in data["priority_counts"].items()
                ]
            }
            for data in discipline_job_counts.values()
        ],
        key=lambda x: x['count'],
        reverse=True
    )

    no_duration = group[(group['DURATION_MINUTES'].isna()) | (group['DURATION_MINUTES'] <= 0)]
    job_count_without_duration = len(no_duration)
    job_status_breakdown_no_duration = no_duration['STATUS'].value_counts(dropna=False).to_dict()
    job_status_breakdown_no_duration = {str(k): int(v) for k, v in job_status_breakdown_no_duration.items()}

    time_summary[str(name)] = {
        "managed_by": manager_name,
        "average_duration_minutes": float(average_duration),
        "total_duration_minutes": float(total_minutes),
        "job_count_with_duration": int(len(valid_durations)),
        "job_count_without_duration": job_count_without_duration,
        "job_status_counts_without_duration": job_status_breakdown_no_duration,
        "status_breakdown_for_each_assignee_counts": status_breakdown,
        "sla_summary": {
            "pass_count": sla_pass,
            "fail_count": sla_fail,
            "pass_percentage": float(sla_pass_pct),
            "fail_percentage": float(sla_fail_pct)
        },
        "location_breakdown": location_breakdown,
        "job_type_breakdown": job_type_breakdown,
        "discipline_job_type_breakdown": discipline_job_breakdown
    }

print("System ready. Enter assignee names to view details 'all' to print all data, or 'exit' to save and quit:")
print("Available assignees:")
for assignee, count in assignee_counts.items():
    print(f"- {assignee} ({count})")

print("\nStep options:")
print("1: Job count with duration and without duration (with status counts for no-duration jobs)")
print("2: Status breakdown")
print("3: Duration summary (avg, total, and job count)")
print("4: SLA summary")
print("5: Location breakdown")
print("6: Job type breakdown")
print("7: Discipline breakdown")
print("8: Managed by")

requested_data = {}

def get_step_data(assignee, step):
    data = time_summary[assignee]
    if step == '1':
        return {
            "job_count_with_duration": data["job_count_with_duration"],
            "job_count_without_duration": data["job_count_without_duration"],
            "job_status_counts_without_duration": data["job_status_counts_without_duration"]
        }
    elif step == '2':
        return {"status_breakdown_for_each_assignee_counts": data["status_breakdown_for_each_assignee_counts"]}
    elif step == '3':
        return {
            "average_duration_minutes": data["average_duration_minutes"],
            "total_duration_minutes": data["total_duration_minutes"],
            "job_count_with_duration": data["job_count_with_duration"]
        }
    elif step == '4':
        return {"sla_summary": data["sla_summary"]}
    elif step == '5':
        return {"location_breakdown": data["location_breakdown"]}
    elif step == '6':
        return {"job_type_breakdown": data["job_type_breakdown"]}
    elif step == '7':
        return {"discipline_job_type_breakdown": data["discipline_job_type_breakdown"]}
    elif step == '8':
        return {"managed_by": data["managed_by"]}
    elif step == 'all':
        return data
    return {}

while True:
    user_input = input("\nEnter assignee name, 'all', or 'exit': ").strip().lower()

    if user_input == 'exit':
        if requested_data:
            with open('requested_assignees.json', 'w') as f:
                json.dump(requested_data, f, indent=2)
            files.download('requested_assignees.json')
            print("Requested data saved to requested_assignees.json. Exiting...")
        else:
            print("No data requested. Exiting...")
        break

    elif user_input == 'all':
        with open('all_assignees.json', 'w') as f:
            json.dump(time_summary, f, indent=2)
        files.download('all_assignees.json')
        print("All data saved to all_assignees.json. Exiting...")
        break

    matched_assignee = None
    for key in time_summary.keys():
        if key.lower() == user_input:
            matched_assignee = key
            break

    if matched_assignee:
        if matched_assignee not in requested_data:
            requested_data[matched_assignee] = {}

        while True:
            step_input = input(f"Which step do you want to view for '{matched_assignee}'? (1-8, 'all', 'back'): ").strip().lower()
            if step_input == 'back':
                break
            elif step_input in ['1', '2', '3', '4', '5', '6', '7', '8', 'all']:
                step_data = get_step_data(matched_assignee, step_input)
                if step_input == 'all':
                    requested_data[matched_assignee] = step_data
                else:
                    requested_data[matched_assignee].update(step_data)

                print(json.dumps({matched_assignee: step_data}, indent=2))
            else:
                print("Invalid input. Please enter 1–8, 'all', or 'back'.")
    else:
        print(json.dumps({"error": "Assignee not found"}, indent=2))
