In [21]:
!pip install pandas




[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
import pandas as pd

In [23]:
with pd.ExcelFile('Inputs.xlsx') as book:
    risk_ranking_weights = pd.read_excel(book, 'Risk Ranking Weights')
    non_union_rates = pd.read_excel(book, 'Non-Union Rates')
    union_rates = pd.read_excel(book, 'Union Rates')
    checkpoint_costs = pd.read_excel(book, 'Checkpoint Costs')
    df = pd.read_excel(book, 'merged_data_union')

In [24]:
# Fix formating
df['DateCreated'] = pd.to_datetime(df['DateCreated'], format = 'mixed', )
df['DateFixed'] = pd.to_datetime(df['DateFixed'], format = 'mixed')

In [25]:
# Step 1: Drop duplicates to ensure uniqueness
checkpoint_time_map = (
    checkpoint_costs[["Checkpoint ID", "Time taken to complete task (in mins)"]]
    .drop_duplicates(subset="Checkpoint ID")
    .set_index("Checkpoint ID")["Time taken to complete task (in mins)"]
)

# Step 2: Map the times into df
df["Time taken to complete task (in mins)"] = df["Checkpoint ID"].map(checkpoint_time_map)


In [26]:
# Step 0: Convert Minute Rate columns to numeric (handle strings with $ or commas if present)
union_rates["Minute Rate"] = pd.to_numeric(union_rates["Minute Rate"], errors='coerce')
non_union_rates["Minute Rate"] = pd.to_numeric(non_union_rates["Minute Rate"], errors='coerce')

# Step 1: Create lookup dictionaries
union_rate_map = union_rates.set_index("Code")["Minute Rate"].to_dict()
non_union_rate_map = non_union_rates.set_index("Code")["Minute Rate"].to_dict()

# Step 2: Function to choose correct rate
def get_minute_rate(row):
    party = row["ResponsibleParty"]
    if row["Union / Non-Union"] == "Union":
        return union_rate_map.get(party)
    else:
        return non_union_rate_map.get(party)

# Step 3: Apply to df
df["Minute Rate (based on union/non-union)"] = df.apply(get_minute_rate, axis=1)


In [27]:
# Step 1: Create Risk Ranking → Weight lookup
risk_weight_map = risk_ranking_weights.set_index("Risk Ranking")["Weights"].to_dict()

# Step 2: Calculate cost to re-do directly using mapped weight
df["Cost to re-do (based on hourly wage and multiplier)"] = (
    df["Time taken to complete task (in mins)"] *
    df["Minute Rate (based on union/non-union)"] *
    df["RiskRanking"].map(risk_weight_map)
)


In [28]:
# Step 1: Create mapping for material cost
material_cost_map = (
    checkpoint_costs[["Checkpoint ID", "Material Cost"]]
    .drop_duplicates(subset="Checkpoint ID")
    .set_index("Checkpoint ID")["Material Cost"]
)

# Step 2: Map into df
df["Material Cost"] = df["Checkpoint ID"].map(material_cost_map)


In [29]:
df["Total cost to re-do task"] = (
    df["Cost to re-do (based on hourly wage and multiplier)"] +
    df["Material Cost"]
)


In [30]:
df.to_excel('Outputs.xlsx', index=False)