In [None]:
import psycopg2
import pandas as pd
import logging
from dotenv import load_dotenv
import os

In [None]:
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()
# create a log file
fh = logging.FileHandler('log.log')
fh.setLevel(logging.INFO)
logger.addHandler(fh)




In [None]:

# Load .env file
load_dotenv()

# Get database connection parameters from environment variables
dbname = os.getenv('DB_NAME')
user = os.getenv('DB_USER')
password = os.getenv('DB_PASSWORD')
host = os.getenv('DB_HOST')
port = os.getenv('DB_PORT')

In [None]:
# Database connection
conn = psycopg2.connect(
    dbname=dbname,
    user=user,
    password=password,
    host=host,
    port=port
)

In [None]:
# Load the Excel data into a DataFrame
data = pd.read_excel('/inputData/CurrentData/Engagement List.xlsx')
# add to logger with information of data loaded (e.g. shape, columns, etc.)
logger.info(f'Data loaded with shape: {data.shape}')
logger.info(f'Columns: {data.columns}')

In [None]:

# Filter the data
filtered_data = data[
    (data['Engagement Partner Service Line'] == 'Consulting') &
    (data['Engagement Status'] == 'Released')
]
# add to logger with information of data filtered (e.g. shape, columns, etc.)
logger.info(f'Data filtered with shape: {filtered_data.shape}')

# Extract the max date for 'last_time_charged_date'
data_date = filtered_data['Last Time Charged Date'].max()
# add to logger with information of data_date
logger.info(f'Max date for "Last Time Charged Date": {data_date}')

In [None]:
# Establish a cursor object
cur = conn.cursor()

# Insert filtered data into the table with logging
for i, row in filtered_data.iterrows():
    try:
        cur.execute('''
        INSERT INTO engagements (
            engagement_id, engagement_creation_date, release_date, last_time_charged_date, last_expenses_charged_date,
            load_date, last_active_etc_p_date, weeks_since_last_etc, etc_compliance_status, data_date,
            engagement, client, engagement_partner_name, engagement_partner_gui, engagement_manager_name, engagement_manager_gui
        ) VALUES (%s, %s, %s, %s, %s, DEFAULT, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT (engagement_id) DO NOTHING;
        ''', (
            row['Engagement ID'], row['Engagement Creation Date'], row['Release Date'], row['Last Time Charged Date'], row['Last Expenses Charged Date'],
            row['Last Active ETC-P Date'], row['Weeks Since Last ETC'], row['ETC Compliance Status'], data_date,
            row['Engagement'], row['Client'], row['Engagement Partner Name'], row['Engagement Partner GUI'], row['Engagement Manager Name'], row['Engagement Manager GUI']
        ))
        logger.info(f"Inserted row {i + 1}/{len(filtered_data)}: {row['Engagement ID']}")
    except psycopg2.errors.UniqueViolation:
        logger.warning(f"Conflict detected for Engagement ID: {row['Engagement ID']}")

# Commit changes and close the connection
conn.commit()
cur.close()
conn.close()

logger.info("Data insertion completed.")