In [1]:
import pandas as pd

df = pd.read_excel('IPC Map Rev.02.xlsx')

def consolidate_approval_data(row):
    if pd.notna(row['Approval Date (By Client)']):
        return pd.Series({
            'Approval Date': row['Approval Date (By Client)'],
            'Monthly Amount Approved': row['Monthly Amount (By Client)'],
            'Cummulative amount Approved': row['Cummulative amount (By Client)']
        })
    else:
        return pd.Series({
            'Approval Date': row['Approval Date (By Consultant)'],
            'Monthly Amount Approved': row['Monthly Amount Approved (By Consultant)'],
            'Cummulative amount Approved': row['Cummulative amount Approved (By Consultant)']
        })

# Apply the function to each row and create new columns
df[['Approval Date', 'Monthly Amount Approved', 'Cummulative amount Approved']] = df.apply(consolidate_approval_data, axis=1)

# Drop the original 6 columns
columns_to_drop = ['Approval Date (By Consultant)', 'Monthly Amount Approved (By Consultant)',
                   'Cummulative amount Approved (By Consultant)', 'Approval Date (By Client)',
                   'Monthly Amount (By Client)', 'Cummulative amount (By Client)']
df.drop(columns=columns_to_drop, inplace=True)

# Save the updated DataFrame (optional)
df.to_excel('IPC Map Rev.03.xlsx', index=False)

In [None]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.03.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify projects with down payments and those without
projects_with_dp = df[df['project'].str.endswith('-DP')]
projects_without_dp = df[~df['project'].str.endswith('-DP')]

# Add a new column 'dp_amount' to the `projects_without_dp` DataFrame and initialize it with 'nan'
projects_without_dp.loc[:, 'dp_amount'] = pd.NA

# Iterate over the rows of `projects_with_dp`
for index, row in projects_with_dp.iterrows():
    project_without_dp = row['project'].replace('-DP', '')
    invoice_no = row['invoice_no.']

    # Check if the `invoice_no.` is not 0 and greater than or equal to 0
    if invoice_no != 0 and invoice_no >= 0:
        # Find the rows in `projects_without_dp` which have the same `invoice_no.` but the project name without the '-DP'
        matched_rows = projects_without_dp.loc[
            (projects_without_dp['project'] == project_without_dp) &
            (projects_without_dp['invoice_no.'] == invoice_no)
        ]

        # Check if any rows from `projects_with_dp` match the same row in `projects_without_dp`
        if len(matched_rows) > 0:
            # Sum the `collection_amount` from `projects_with_dp` and assign it to the `DP Amount` column in
            # `projects_without_dp` for the matched rows
            dp_amount = row['collection_amount']

            # Use `.loc` to update 'dp_amount' and avoid the SettingWithCopyWarning
            projects_without_dp.loc[matched_rows.index, 'dp_amount'] = (
                projects_without_dp.loc[matched_rows.index, 'dp_amount'].fillna(0).astype(float) + dp_amount
            )

# Concatenate the DataFrames and remove rows with '-DP' in project name
final_df = pd.concat([projects_without_dp, projects_with_dp])
final_df = final_df[~final_df['project'].str.endswith('-DP')]

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.04.xlsx', index=False)

In [None]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.04.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify projects with "on account" payments and those without
projects_with_on_account = df[df['project'].str.lower().str.endswith('on account')]
projects_without_on_account = df[~df['project'].str.lower().str.endswith('on account')]

# Add a new column 'on_account_amount' initialized with NaN
projects_without_on_account.loc[:, 'on_account_amount'] = pd.NA

# Iterate over projects with "on account" payments
for index, row in projects_with_on_account.iterrows():
    # Extract the base project name by removing "on account" variations (case-insensitive)
    project_name = row['project'].lower().replace(' - on account', '').replace('on account', '').strip()
    invoice_no = row['invoice_no.']
    collection_amount = row['collection_amount']

    # Find the corresponding row in projects_without_on_account (case-insensitive)
    matched_rows = projects_without_on_account.loc[
        (projects_without_on_account['project'].str.lower() == project_name) &
        (projects_without_on_account['invoice_no.'] == invoice_no)
    ]

    # Update the 'on_account_amount' in the corresponding row
    if len(matched_rows) > 0:
        projects_without_on_account.loc[matched_rows.index, 'on_account_amount'] = (
            projects_without_on_account.loc[matched_rows.index, 'on_account_amount'].fillna(0).astype(float) + collection_amount
        )

# Concatenate the DataFrames and remove rows with "on account" variations
final_df = pd.concat([projects_without_on_account, projects_with_on_account])
final_df = final_df[~final_df['project'].str.lower().str.endswith('on account')]

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.05.xlsx', index=False)

In [3]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.05.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify rows with "Net Amount" in the "package" column and those without
net_amount_rows = df[df['package'] == 'Net Amount']
other_rows = df[df['package'] != 'Net Amount']

# Add a new column 'net_amount_monthly_approved' initialized with NaN
other_rows.loc[:, 'net_amount_monthly_approved'] = pd.NA

# Iterate over rows with "Net Amount" in the "package" column
for index, row in net_amount_rows.iterrows():
    project_name = row['project']
    invoice_no = row['invoice_no.']
    net_amount_monthly_approved = row['monthly_amount_approved']

    # Find the corresponding row in other_rows
    matched_rows = other_rows.loc[
        (other_rows['project'] == project_name) &
        (other_rows['invoice_no.'] == invoice_no)
    ]

    # Update the 'net_amount_monthly_approved' in the corresponding row using .loc
    if len(matched_rows) > 0:
        other_rows.loc[matched_rows.index, 'net_amount_monthly_approved'] = net_amount_monthly_approved

# Concatenate the DataFrames
final_df = pd.concat([other_rows, net_amount_rows])

# Remove rows with "Net Amount" in the "package" column
final_df = final_df[final_df['package'] != 'Net Amount']

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.06.xlsx', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, 'net_amount_monthly_approved'] = pd.NA


In [4]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.06.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify rows with "Material on site" in the "package" column and those without
material_on_site_rows = df[df['package'] == 'Material on site']
other_rows = df[df['package'] != 'Material on site']

# Add a new column 'material_on_site_cummulative_amount_approved' initialized with NaN
other_rows.loc[:, 'material_on_site_cummulative_amount_approved'] = pd.NA

# Iterate over rows with "Material on site" in the "package" column
for index, row in material_on_site_rows.iterrows():
    project_name = row['project']
    invoice_no = row['invoice_no.']
    material_on_site_cummulative_amount_approved = row['cummulative_amount_approved']

    # Find the corresponding row in other_rows
    matched_rows = other_rows.loc[
        (other_rows['project'] == project_name) &
        (other_rows['invoice_no.'] == invoice_no)
    ]

    # Update the 'material_on_site_cummulative_amount_approved' in the corresponding row
    if len(matched_rows) > 0:
        other_rows.loc[matched_rows.index, 'material_on_site_cummulative_amount_approved'] = material_on_site_cummulative_amount_approved

# Concatenate the DataFrames
final_df = pd.concat([other_rows, material_on_site_rows])

# Remove rows with "Material on site" in the "package" column
final_df = final_df[final_df['package'] != 'Material on site']

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.07.xlsx', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, 'material_on_site_cummulative_amount_approved'] = pd.NA


In [5]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.07.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify rows with "V.O" in the "package" column and those without
vo_rows = df[df['package'] == 'V.O']
other_rows = df[df['package'] != 'V.O']

# Add a new column 'v.o._cummulative_amount_approved' initialized with NaN
other_rows.loc[:, 'v.o._cummulative_amount_approved'] = pd.NA

# Iterate over rows with "V.O" in the "package" column
for index, row in vo_rows.iterrows():
    project_name = row['project']
    invoice_no = row['invoice_no.']
    vo_cummulative_amount_approved = row['cummulative_amount_approved']

    # Find the corresponding row in other_rows
    matched_rows = other_rows.loc[
        (other_rows['project'] == project_name) &
        (other_rows['invoice_no.'] == invoice_no)
    ]

    # Update the 'v.o._cummulative_amount_approved' in the corresponding row
    if len(matched_rows) > 0:
        other_rows.loc[matched_rows.index, 'v.o._cummulative_amount_approved'] = vo_cummulative_amount_approved

# Concatenate the DataFrames
final_df = pd.concat([other_rows, vo_rows])

# Remove rows with "V.O" in the "package" column
final_df = final_df[final_df['package'] != 'V.O']

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.08.xlsx', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, 'v.o._cummulative_amount_approved'] = pd.NA


In [6]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.08.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify rows with "NCR" in the "package" column and those without
ncr_rows = df[df['package'] == 'NCR']
other_rows = df[df['package'] != 'NCR']

# Add a new column 'ncr_cummulative_amount_approved' initialized with NaN
other_rows.loc[:, 'ncr_cummulative_amount_approved'] = pd.NA

# Iterate over rows with "NCR" in the "package" column
for index, row in ncr_rows.iterrows():
    project_name = row['project']
    invoice_no = row['invoice_no.']
    ncr_cummulative_amount_approved = row['cummulative_amount_approved']

    # Find the corresponding row in other_rows
    matched_rows = other_rows.loc[
        (other_rows['project'] == project_name) &
        (other_rows['invoice_no.'] == invoice_no)
    ]

    # Update the 'ncr_cummulative_amount_approved' in the corresponding row
    if len(matched_rows) > 0:
        other_rows.loc[matched_rows.index, 'ncr_cummulative_amount_approved'] = ncr_cummulative_amount_approved

# Concatenate the DataFrames
final_df = pd.concat([other_rows, ncr_rows])

# Remove rows with "NCR" in the "package" column
final_df = final_df[final_df['package'] != 'NCR']

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.09.xlsx', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, 'ncr_cummulative_amount_approved'] = pd.NA


In [7]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.09.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify rows with "Escalation" in the "package" column and those without
escalation_rows = df[df['package'] == 'Escalation']
other_rows = df[df['package'] != 'Escalation']

# Add a new column 'escalation_cummulative_amount_approved' initialized with NaN
other_rows.loc[:, 'escalation_cummulative_amount_approved'] = pd.NA

# Iterate over rows with "Escalation" in the "package" column
for index, row in escalation_rows.iterrows():
    project_name = row['project']
    invoice_no = row['invoice_no.']
    escalation_cummulative_amount_approved = row['cummulative_amount_approved']

    # Find the corresponding row in other_rows
    matched_rows = other_rows.loc[
        (other_rows['project'] == project_name) &
        (other_rows['invoice_no.'] == invoice_no)
    ]

    # Update the 'escalation_cummulative_amount_approved' in the corresponding row
    if len(matched_rows) > 0:
        other_rows.loc[matched_rows.index, 'escalation_cummulative_amount_approved'] = escalation_cummulative_amount_approved

# Concatenate the DataFrames
final_df = pd.concat([other_rows, escalation_rows])

# Remove rows with "Escalation" in the "package" column
final_df = final_df[final_df['package'] != 'Escalation']

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.10.xlsx', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, 'escalation_cummulative_amount_approved'] = pd.NA


In [8]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.10.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify rows with "1st Retention" or "2nd Retention" in the "package" column and those without
retention_rows = df[df['package'].isin(['1st Retention', '2nd Retention'])]
other_rows = df[~df['package'].isin(['1st Retention', '2nd Retention'])]

# Add new columns '1st_retention_cummulative_amount_approved' and '2nd_retention_cummulative_amount_approved' initialized with NaN
other_rows.loc[:, '1st_retention_cummulative_amount_approved'] = pd.NA
other_rows.loc[:, '2nd_retention_cummulative_amount_approved'] = pd.NA

# Iterate over retention rows
for index, row in retention_rows.iterrows():
    project_name = row['project']
    invoice_no = row['invoice_no.']
    retention_amount = row['cummulative_amount_approved']
    package = row['package']

    # Find the corresponding row in other_rows
    matched_rows = other_rows.loc[
        (other_rows['project'] == project_name) &
        (other_rows['invoice_no.'] == invoice_no)
    ]

    # Update the corresponding retention column
    if len(matched_rows) > 0:
        if package == '1st Retention':
            other_rows.loc[matched_rows.index, '1st_retention_cummulative_amount_approved'] = retention_amount
        elif package == '2nd Retention':
            other_rows.loc[matched_rows.index, '2nd_retention_cummulative_amount_approved'] = retention_amount

# Concatenate the DataFrames
final_df = pd.concat([other_rows, retention_rows])

# Remove rows with "1st Retention" or "2nd Retention" in the "package" column
final_df = final_df[~final_df['package'].isin(['1st Retention', '2nd Retention'])]

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.11.xlsx', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, '1st_retention_cummulative_amount_approved'] = pd.NA
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, '2nd_retention_cummulative_amount_approved'] = pd.NA


In [1]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('IPC Map Rev.11.xlsx')

# Make column names lowercase and replace spaces with underscores
df.columns = df.columns.str.lower().str.replace(' ', '_')

# Convert 'submittal_date' to datetime
df['submittal_date'] = pd.to_datetime(df['submittal_date'])

# Identify rows with "Deductions" in the "package" column and those without
deductions_rows = df[df['package'] == 'Deductions']
other_rows = df[df['package'] != 'Deductions']

# Add a new column 'total_deductions' initialized with NaN
other_rows.loc[:, 'total_deductions'] = pd.NA

# Group deductions by project and invoice number and sum the cumulative approved amounts
deductions_summary = deductions_rows.groupby(['project', 'invoice_no.'])['cummulative_amount_approved'].sum().reset_index()

# Iterate over the grouped deductions
for index, row in deductions_summary.iterrows():
    project_name = row['project']
    invoice_no = row['invoice_no.']
    total_deductions = row['cummulative_amount_approved']

    # Find the corresponding row in other_rows
    matched_rows = other_rows.loc[
        (other_rows['project'] == project_name) &
        (other_rows['invoice_no.'] == invoice_no)
    ]

    # Update the 'total_deductions' in the corresponding row
    if len(matched_rows) > 0:
        other_rows.loc[matched_rows.index, 'total_deductions'] = total_deductions

# Concatenate the DataFrames
final_df = pd.concat([other_rows, deductions_rows])

# Remove rows with "Deductions" in the "package" column
final_df = final_df[final_df['package'] != 'Deductions']

# Save the updated DataFrame to a new Excel file
final_df.to_excel('IPC Map Rev.12.xlsx', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  other_rows.loc[:, 'total_deductions'] = pd.NA
