In [1]:
import pandas as pd

# Load the uploaded files into dataframes
clewiston_expense_1 = pd.read_csv("Clewiston Expense 2.csv")
clewiston_expense_2 = pd.read_csv("Clewiston Expenses.csv")

# Display the first few rows of both dataframes to understand their structure
clewiston_expense_1_head = clewiston_expense_1.head()
clewiston_expense_2_head = clewiston_expense_2.head()

clewiston_expense_1_head, clewiston_expense_2_head


(  Natural Electric LLC Unnamed: 1  Unnamed: 2        Unnamed: 3 Unnamed: 4
 0  Clewiston Expense 2        NaN         NaN               NaN        NaN
 1            All Dates        NaN         NaN               NaN        NaN
 2                  NaN    Account        Date  Transaction type     Amount
 3                 Fuel        NaN         NaN               NaN        NaN
 4                  NaN       Fuel  10/04/2019           Expense      $8.31,
             Natural Electric LLC                     Unnamed: 1  Unnamed: 2  \
 0             Clewiston Expenses                            NaN         NaN   
 1                      All Dates                            NaN         NaN   
 2                            NaN                        Account        Date   
 3  51400 Job Materials Purchased                            NaN         NaN   
 4                            NaN  51400 Job Materials Purchased  09/24/2019   
 
          Unnamed: 3 Unnamed: 4  
 0               NaN       

In [3]:
# Reload the datasets, skipping irrelevant header rows
clewiston_expense_1_clean = pd.read_csv("Clewiston Expense 2.csv", skiprows=4)
clewiston_expense_2_clean = pd.read_csv("Clewiston Expenses.csv", skiprows=4)

# Ensure the columns align between both datasets
columns_match = clewiston_expense_1_clean.columns == clewiston_expense_2_clean.columns

# Concatenate the datasets if columns align
if columns_match.all():
    combined_clewiston_expenses = pd.concat([clewiston_expense_1_clean, clewiston_expense_2_clean], ignore_index=True)
else:
    combined_clewiston_expenses = None  # Handle misalignment scenario

# Display the first few rows of the combined dataset (or a message if misaligned)
if combined_clewiston_expenses is not None:
    print(combined_clewiston_expenses.head())
else:
    print("Columns do not align between the two datasets.")


  Unnamed: 0 Account        Date Transaction type   Amount
0       Fuel     NaN         NaN              NaN      NaN
1        NaN    Fuel  10/04/2019          Expense    $8.31
2        NaN    Fuel  10/04/2019          Expense  $100.00
3        NaN    Fuel  10/10/2019          Expense  $112.00
4        NaN    Fuel  10/23/2019          Expense   $33.91


In [5]:
# Remove the first column
combined_clewiston_expenses_cleaned = combined_clewiston_expenses.iloc[:, 1:]

# Drop rows with any NaN values
combined_clewiston_expenses_cleaned = combined_clewiston_expenses_cleaned.dropna()

# Reset the index after dropping rows
combined_clewiston_expenses_cleaned.reset_index(drop=True, inplace=True)

# Display the cleaned dataset to confirm the changes
combined_clewiston_expenses_cleaned.head()


Unnamed: 0,Account,Date,Transaction type,Amount
0,Fuel,10/04/2019,Expense,$8.31
1,Fuel,10/04/2019,Expense,$100.00
2,Fuel,10/10/2019,Expense,$112.00
3,Fuel,10/23/2019,Expense,$33.91
4,Fuel,10/24/2019,Expense,$90.00


In [6]:
#check to be sure rows with missing values were removed
# Identify rows with any missing values
rows_with_missing_values = combined_clewiston_expenses_cleaned[combined_clewiston_expenses_cleaned.isna().any(axis=1)]

# Display the rows with missing values (if any)
if not rows_with_missing_values.empty:
    print("Rows with missing values:")
    print(rows_with_missing_values)
else:
    print("No rows with missing values found.")


No rows with missing values found.


In [7]:
# Display the data types of each column in the cleaned dataset
data_types = combined_clewiston_expenses_cleaned.dtypes

data_types


Account             object
Date                object
Transaction type    object
Amount              object
dtype: object

In [9]:
# Convert 'Amount' column to numeric by removing currency symbols and commas
combined_clewiston_expenses_cleaned['Amount'] = (
    combined_clewiston_expenses_cleaned['Amount']
    .replace(r'[\$,]', '', regex=True)
    .astype(float)
)

# Convert 'Date' column to datetime format
combined_clewiston_expenses_cleaned['Date'] = pd.to_datetime(
    combined_clewiston_expenses_cleaned['Date'], errors='coerce'
)

# Display the first few rows to confirm the changes
combined_clewiston_expenses_cleaned



Unnamed: 0,Account,Date,Transaction type,Amount
0,Fuel,2019-10-04,Expense,8.31
1,Fuel,2019-10-04,Expense,100.00
2,Fuel,2019-10-10,Expense,112.00
3,Fuel,2019-10-23,Expense,33.91
4,Fuel,2019-10-24,Expense,90.00
...,...,...,...,...
574,50800 Equipment Rental for Jobs,2020-02-03,Bill,548.42
575,50800 Equipment Rental for Jobs,2020-02-17,Bill,1565.73
576,50800 Equipment Rental for Jobs,2020-04-09,Bill,318.26
577,50800 Equipment Rental for Jobs,2020-07-02,Expense,16187.00


In [10]:
# Display all unique values in the 'Account' column
unique_accounts = combined_clewiston_expenses_cleaned['Account'].unique()

unique_accounts


array(['Fuel', 'Travel', '64300 Meals and Entertainment',
       'Administration', 'Employee Expense', 'Wages', 'Taxes',
       '51400 Job Materials Purchased', 'PT Casual Labor',
       '50800 Equipment Rental for Jobs'], dtype=object)

In [11]:
# Remove leading numbers and spaces from the 'Account' column using regex
combined_clewiston_expenses_cleaned['Account'] = combined_clewiston_expenses_cleaned['Account'].str.replace(r'^\d+\s+', '', regex=True)

# Display the unique account values after cleaning
unique_accounts_cleaned = combined_clewiston_expenses_cleaned['Account'].unique()

unique_accounts_cleaned


array(['Fuel', 'Travel', 'Meals and Entertainment', 'Administration',
       'Employee Expense', 'Wages', 'Taxes', 'Job Materials Purchased',
       'PT Casual Labor', 'Equipment Rental for Jobs'], dtype=object)

In [12]:
# Replace specified account names with 'Labor'
combined_clewiston_expenses_cleaned['Account'] = combined_clewiston_expenses_cleaned['Account'].replace(
    ['Wages', 'Taxes', 'PT Casual Labor'], 'Labor'
)

# Display the unique account values to confirm the changes
unique_accounts_updated = combined_clewiston_expenses_cleaned['Account'].unique()

unique_accounts_updated


array(['Fuel', 'Travel', 'Meals and Entertainment', 'Administration',
       'Employee Expense', 'Labor', 'Job Materials Purchased',
       'Equipment Rental for Jobs'], dtype=object)

In [13]:
# Replace specified account names with 'Other'
combined_clewiston_expenses_cleaned['Account'] = combined_clewiston_expenses_cleaned['Account'].replace(
    ['Travel', 'Meals and Entertainment', 'Administration', 'Employee Expense'], 'Other'
)

# Display the unique account values to confirm the changes
unique_accounts_final = combined_clewiston_expenses_cleaned['Account'].unique()

unique_accounts_final


array(['Fuel', 'Other', 'Labor', 'Job Materials Purchased',
       'Equipment Rental for Jobs'], dtype=object)

In [14]:
#Change labor to reflect pay period end date instead of payday (one week past pay period end date)
# Subtract one week from the 'Date' column for all 'Labor' entries
combined_clewiston_expenses_cleaned.loc[
    combined_clewiston_expenses_cleaned['Account'] == 'Labor', 'Date'
] = combined_clewiston_expenses_cleaned.loc[
    combined_clewiston_expenses_cleaned['Account'] == 'Labor', 'Date'
] - pd.Timedelta(weeks=1)

# Display the updated 'Labor' entries to confirm the changes
labor_entries_updated = combined_clewiston_expenses_cleaned[
    combined_clewiston_expenses_cleaned['Account'] == 'Labor'
]

labor_entries_updated



Unnamed: 0,Account,Date,Transaction type,Amount
92,Labor,2019-10-11,Payroll Check,2400.00
93,Labor,2019-10-11,Payroll Check,1013.57
94,Labor,2019-10-11,Payroll Check,1440.00
95,Labor,2019-10-25,Payroll Check,1548.50
96,Labor,2019-10-25,Payroll Check,1065.90
...,...,...,...,...
568,Labor,2019-09-27,Check,324.00
569,Labor,2020-05-29,Expense,136.00
570,Labor,2020-06-01,Expense,136.00
571,Labor,2020-06-22,Expense,195.00


In [15]:
# Rename the dataframe
clewiston_expense_df = combined_clewiston_expenses_cleaned

# Save the dataframe as 'ClewistonExpensesFinal.csv'
csv_path = "ClewistonExpensesFinal.csv"
clewiston_expense_df.to_csv(csv_path, index=False)

# Confirm the operation
csv_path


'ClewistonExpensesFinal.csv'

In [16]:
#import NeJobs CSV to add Clewiston data
NE_jobs_df = pd.read_csv("NEJobs.csv")

In [17]:
# Calculate the necessary values from clewiston_expense_df
materials_sum = clewiston_expense_df[clewiston_expense_df['Account'] == 'Job Materials Purchased']['Amount'].sum()
labor_sum = clewiston_expense_df[clewiston_expense_df['Account'] == 'Labor']['Amount'].sum()
equipment_rental_sum = clewiston_expense_df[clewiston_expense_df['Account'] == 'Equipment Rental for Jobs']['Amount'].sum()
fuel_sum = clewiston_expense_df[clewiston_expense_df['Account'] == 'Fuel']['Amount'].sum()
other_sum = clewiston_expense_df[clewiston_expense_df['Account'] == 'Other']['Amount'].sum()

# Total cost for Clewiston project
total_cost = clewiston_expense_df['Amount'].sum()

# Assuming total income for this project (you can adjust this value)
total_income = 374819.47  # Replace with the correct value if available

# Calculate profit margin: (Total Income - Total Cost) / Total Income
profit_margin = (total_income - total_cost) / total_income

# Sum of vendor credits (as negative numbers if applicable)
vendor_credits_sum = clewiston_expense_df[
    clewiston_expense_df['Transaction type'] == 'Vendor Credit']['Amount'].sum()

# Create a new row for the Clewiston job
clewiston_row = {
    "Job Name": "Clewiston",
    "Materials": materials_sum,
    "Labor": labor_sum,
    "Equipment Rental": equipment_rental_sum,
    "Subcontractor Expense": 0.00,  # No subcontractor expense mentioned
    "Fuel": fuel_sum,
    "Other": other_sum,
    "Total Cost": total_cost,
    "Total Income": total_income,
    "Profit Margin": profit_margin,
    "Vendor Credits": vendor_credits_sum
}

# Append the Clewiston row to NE_jobs_df
NE_jobs_df = pd.concat([NE_jobs_df, pd.DataFrame([clewiston_row])], ignore_index=True)

# Save the updated NE_jobs_df to the CSV file
csv_path = "NEjobs.csv"
NE_jobs_df.to_csv(csv_path, index=False)

# Display the updated NE_jobs_df to confirm
NE_jobs_df



Unnamed: 0,Job Name,Materials,Labor,Equipment Rental,Subcontractor Expense,Fuel,Other,Total Cost,Total Income,Profit Margin,Vendor Credits
0,The Manor,246049.05,199060.77,4181.55,47900.54,6010.92,26252.18,529455.01,828476.88,0.36,-30620.53
1,Clewiston,97240.76,123064.47,23202.76,0.0,2434.42,1290.76,247233.17,374819.47,0.340394,-825.01


In [18]:
clewiston_expense_df.to_excel('ClewistonExpensesFinal.xlsx', index=False)