In [1]:
import pandas as pd

df = pd.read_excel('IPC Map Rev.02.xlsx')

def consolidate_approval_data(row):
    if pd.notna(row['Approval Date (By Client)']):
        return pd.Series({
            'Approval Date': row['Approval Date (By Client)'],
            'Monthly Amount Approved': row['Monthly Amount (By Client)'],
            'Cummulative amount Approved': row['Cummulative amount (By Client)']
        })
    else:
        return pd.Series({
            'Approval Date': row['Approval Date (By Consultant)'],
            'Monthly Amount Approved': row['Monthly Amount Approved (By Consultant)'],
            'Cummulative amount Approved': row['Cummulative amount Approved (By Consultant)']
        })

# Apply the function to each row and create new columns
df[['Approval Date', 'Monthly Amount Approved', 'Cummulative amount Approved']] = df.apply(consolidate_approval_data, axis=1)

# Drop the original 6 columns
columns_to_drop = ['Approval Date (By Consultant)', 'Monthly Amount Approved (By Consultant)',
                   'Cummulative amount Approved (By Consultant)', 'Approval Date (By Client)',
                   'Monthly Amount (By Client)', 'Cummulative amount (By Client)']
df.drop(columns=columns_to_drop, inplace=True)

# Save the updated DataFrame (optional)
df.to_excel('IPC Map Rev.03.xlsx', index=False)

In [None]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.03.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify projects with down payments and those without
projects_with_dp = df[df['project'].str.endswith('-DP')]
projects_without_dp = df[~df['project'].str.endswith('-DP')]

# Add a new column 'dp_amount' to the `projects_without_dp` DataFrame and initialize it with 'nan'
projects_without_dp.loc[:, 'dp_amount'] = pd.NA

# Iterate over the rows of `projects_with_dp`
for index, row in projects_with_dp.iterrows():
    project_without_dp = row['project'].replace('-DP', '')
    invoice_no = row['invoice_no.']

    # Check if the `invoice_no.` is not 0 and greater than or equal to 0
    if invoice_no != 0 and invoice_no >= 0:
        # Find the rows in `projects_without_dp` which have the same `invoice_no.` but the project name without the '-DP'
        matched_rows = projects_without_dp.loc[
            (projects_without_dp['project'] == project_without_dp) &
            (projects_without_dp['invoice_no.'] == invoice_no)
        ]

        # Check if any rows from `projects_with_dp` match the same row in `projects_without_dp`
        if len(matched_rows) > 0:
            # Sum the `collection_amount` from `projects_with_dp` and assign it to the `DP Amount` column in
            # `projects_without_dp` for the matched rows
            dp_amount = row['collection_amount']

            # Use `.loc` to update 'dp_amount' and avoid the SettingWithCopyWarning
            projects_without_dp.loc[matched_rows.index, 'dp_amount'] = (
                projects_without_dp.loc[matched_rows.index, 'dp_amount'].fillna(0).astype(float) + dp_amount
            )

# Concatenate the DataFrames and remove rows with '-DP' in project name
final_df = pd.concat([projects_without_dp, projects_with_dp])
final_df = final_df[~final_df['project'].str.endswith('-DP')]

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.04.xlsx', index=False)

In [None]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.04.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify projects with "on account" payments and those without
projects_with_on_account = df[df['project'].str.lower().str.endswith('on account')]
projects_without_on_account = df[~df['project'].str.lower().str.endswith('on account')]

# Add a new column 'on_account_amount' initialized with NaN
projects_without_on_account.loc[:, 'on_account_amount'] = pd.NA

# Iterate over projects with "on account" payments
for index, row in projects_with_on_account.iterrows():
    # Extract the base project name by removing "on account" variations (case-insensitive)
    project_name = row['project'].lower().replace(' - on account', '').replace('on account', '').strip()
    invoice_no = row['invoice_no.']
    collection_amount = row['collection_amount']

    # Find the corresponding row in projects_without_on_account (case-insensitive)
    matched_rows = projects_without_on_account.loc[
        (projects_without_on_account['project'].str.lower() == project_name) &
        (projects_without_on_account['invoice_no.'] == invoice_no)
    ]

    # Update the 'on_account_amount' in the corresponding row
    if len(matched_rows) > 0:
        projects_without_on_account.loc[matched_rows.index, 'on_account_amount'] = (
            projects_without_on_account.loc[matched_rows.index, 'on_account_amount'].fillna(0).astype(float) + collection_amount
        )

# Concatenate the DataFrames and remove rows with "on account" variations
final_df = pd.concat([projects_without_on_account, projects_with_on_account])
final_df = final_df[~final_df['project'].str.lower().str.endswith('on account')]

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.05.xlsx', index=False)