## Project: Calendar Update File for DOR 

**This code locates Outlook emails, downloads attachments to `S:\Calendar Update File for DOR\Event Attachment`, and extracts event and group information from the attachments into the Master Template.xlsx. In the Master Template**

*Reminders:*
1. Ensure the Master Template.xlsx file is closed before running the code.
2. If you input something incorrectly, click "Kernel" and then "Restart Kernel."
3. If you encounter a date parsing error in any Excel file, open the file and manually change the date to the MM/DD/YYYY format.
4. The information will only be extracted if the downloaded files follow structures similar to "11.01.23 Resort Strategy Meeting.xlsx," "Entertainment Calendar DOR Updated 7.16.24.xlsx," or "HOTSHOT Report 07.29.2024.xlsx."

*You will need to manually input:*
1. Your email account
2. The subject of the email you want to filter by
3. The start date of the emails you want to filter by
4. The end date of the emails you want to filter by


In [4]:
pip install pywin32

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import win32com.client
from datetime import datetime, timedelta
import openpyxl
import pandas as pd

# Function to validate and parse dates
def validate_and_parse_date(date_str, file_name):
    try:
        if len(date_str) == 10:  # Checks if the date has the correct length
            return datetime.strptime(date_str, '%m/%d/%Y')
        else:
            raise ValueError(f"Invalid date format: {date_str}")
    except ValueError as e:
        print(f"Error parsing dates in file {file_name}: {e}")
        return None

# Function to download attachments from Outlook based on dynamic subject filters
def download_attachments_from_outlook(email_account, keywords, download_folder, start_date, end_date):
    if not os.path.exists(download_folder):
        os.makedirs(download_folder)

    outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
    inbox = outlook.Folders.Item(email_account).Folders.Item("Inbox")
    messages = inbox.Items
    messages = messages.Restrict("[ReceivedTime] >= '" + start_date.strftime("%m/%d/%Y %I:%M %p") + "' AND [ReceivedTime] < '" + end_date.strftime("%m/%d/%Y %I:%M %p") + "'")
    allowed_extensions = ('.xlsx', '.xls', '.pdf')

    attachment_info = []

    for message in messages:
        if any(keyword.lower() in message.Subject.lower() for keyword in keywords):
            received_time = message.ReceivedTime.strftime("%m/%d/%Y %I:%M %p")
            attachments = message.Attachments
            for attachment in attachments:
                if attachment.FileName.lower().endswith(allowed_extensions) and any(keyword.lower() in attachment.FileName.lower() for keyword in keywords):
                    attachment_path = os.path.join(download_folder, attachment.FileName)
                    if not os.path.exists(attachment_path):
                        attachment.SaveAsFile(attachment_path)
                        print(f"Downloaded {attachment.FileName} to {download_folder}")
                    else:
                        print(f"File {attachment.FileName} already exists. Using existing file.")
                    attachment_info.append((attachment_path, received_time))
                else:
                    print(f"Skipped {attachment.FileName}: Not an allowed file or does not match keywords.")
    
    return attachment_info

def extract_event_info_from_excel(attachment_path, received_time):
    file_name = os.path.basename(attachment_path)
    event_info = []

    try:
        if "HOTSHOT" in file_name.upper():
            print(f"'HOTSHOT' found in file name {file_name}. Extracting event information from 'Overview' sheet.")
            df = pd.read_excel(attachment_path, sheet_name="Overview", header=None)
            
            if df.empty:
                print(f"No data found in 'Overview' sheet of {file_name}.")
            else:
                print(f"Data found in 'Overview' sheet of {file_name}. Processing rows starting from row 9...")

            for index in range(8, df.shape[0]):  # start from row 9 (index 8)
                row = df.iloc[index]
                date = row[2]  # Column C (Date)
                dow = row[3]  # Column D (DoW)
                citywide_event = row[4]  # Column E (CityWide Event)
                inhouse_group = row[5]  # Column F (Inhouse Group)
 
                event_info.append({
                    'File': file_name,
                    'Date': date,
                    'Day of Week': dow,
                    'Event': citywide_event,
                    'Group': inhouse_group,
                    'Received Time': received_time
                })

        elif "RESORT STRATEGY MEETING" in file_name.upper():
            print(f"'Resort Strategy Meeting' found in file name {file_name}. Extracting event information from 'HS-Overview' sheet.")
            df = pd.read_excel(attachment_path, sheet_name="HS-Overview", header=None)
            
            if df.empty:
                print(f"No data found in 'HS-Overview' sheet of {file_name}.")
            else:
                print(f"Data found in 'HS-Overview' sheet of {file_name}. Processing rows starting from row 9...")

            for index in range(9, len(df)):
                row = df.iloc[index]
                if len(row) > 5:  # Ensure the row has enough columns
                    date = row[2]  # Column C (Date)
                    dow = row[3]  # Column D (DoW)
                    event = row[4]  # Column E (Event)
                    group = row[5]  # Column F (Group)

                    event_info.append({
                        'File': file_name,
                        'Date': date,
                        'Day of Week': dow,
                        'Event': event,
                        'Group': group,
                        'Received Time': received_time
                    })

        elif "ENTERTAINMENT CALENDAR DOR UPDATED" in file_name.upper():
            print(f"'Entertainment Calendar DOR Updated' found in file name {file_name}. Extracting event information from 'DOR' sheet.")
            df = pd.read_excel(attachment_path, sheet_name="DOR")
            for index, row in df.iterrows():
                start_date = row.get('Start Date', '')
                end_date = row.get('End Date', '')
                event_name = row.get('Event Name', '')

                if start_date and end_date:
                    if isinstance(start_date, (pd.Timestamp, datetime)):
                        start_date = start_date.strftime('%m/%d/%Y')
                    if isinstance(end_date, (pd.Timestamp, datetime)):
                        end_date = end_date.strftime('%m/%d/%Y')

                    start_date_obj = validate_and_parse_date(start_date, file_name)
                    end_date_obj = validate_and_parse_date(end_date, file_name)
                    
                    if not start_date_obj or not end_date_obj:
                        continue

                    current_date = start_date_obj
                    while current_date <= end_date_obj:
                        formatted_date = current_date.strftime('%m/%d/%Y')
                        event_info.append({
                            'File': file_name,
                            'Date': formatted_date,
                            'Event': event_name,
                            'Received Time': received_time
                        })
                        current_date += timedelta(days=1)

        else:
            print(f"Relevant keyword not found in file name {file_name}. Skipping extraction.")

    except Exception as e:
        print(f"Error processing {file_name}: {e}")
    
    return event_info


# Function to update master template with event information
def update_master_template(master_template_path, event_info):
    try:
        workbook = openpyxl.load_workbook(master_template_path)
        event_sheet = workbook["Event"]
        group_sheet = workbook["Group"]
        
        event_sheet.delete_rows(2, event_sheet.max_row - 1)
        group_sheet.delete_rows(2, group_sheet.max_row - 1)
        
        event_row = 2
        group_row = 2  # Start from row 2, assuming the first row is headers
        
        for info in event_info:
            date_value = info['Date']
            
            if pd.isna(date_value):
                continue
            elif isinstance(date_value, datetime):
                date_str = date_value.strftime('%m/%d/%Y')
            else:
                date_str = str(date_value)
            
            try:
                dow = datetime.strptime(date_str, '%m/%d/%Y').strftime('%a')
            except ValueError:
                continue

            event_value = str(info.get('Event', '')) if not pd.isna(info.get('Event', '')) else ''
            group_value = str(info.get('Group', '')) if not pd.isna(info.get('Group', '')) else ''

            # Split the event name string by "/" and assign to Event 1, Event 2, Event 3 columns
            event_names = event_value.split('/')
            event_sheet.cell(row=event_row, column=1).value = info['File']
            event_sheet.cell(row=event_row, column=2).value = info['Received Time']
            event_sheet.cell(row=event_row, column=3).value = dow
            event_sheet.cell(row=event_row, column=4).value = date_str
            event_sheet.cell(row=event_row, column=5).value = event_names[0].strip() if len(event_names) > 0 else ''
            event_sheet.cell(row=event_row, column=6).value = event_names[1].strip() if len(event_names) > 1 else ''
            event_sheet.cell(row=event_row, column=7).value = event_names[2].strip() if len(event_names) > 2 else ''
            event_row += 1

            # Split the group name string by "/" and assign to Group 1, Group 2, Group 3 columns
            group_names = group_value.split('/')
            group_sheet.cell(row=group_row, column=1).value = info['File']
            group_sheet.cell(row=group_row, column=2).value = info['Received Time']
            group_sheet.cell(row=group_row, column=3).value = dow
            group_sheet.cell(row=group_row, column=4).value = date_str
            group_sheet.cell(row=group_row, column=5).value = group_names[0].strip() if len(group_names) > 0 else ''
            group_sheet.cell(row=group_row, column=6).value = group_names[1].strip() if len(group_names) > 1 else ''
            group_sheet.cell(row=group_row, column=7).value = group_names[2].strip() if len(group_names) > 2 else ''
            group_row += 1
        
        workbook.save(master_template_path)

    except PermissionError as e:
        print(f"Permission denied: {e}. Please close the Master Template file if it is open and try again.")

if __name__ == "__main__":
    email_account = input("Enter your Outlook email account: ")
    keywords = input("Enter keywords for filtering (comma-separated): ").split(",")
    download_folder = r"S:\Calendar Update File for DOR\Event Attachment" 
    start_date_str = input("Enter the start date (YYYY-MM-DD): ")
    end_date_str = input("Enter the end date (YYYY-MM-DD): ")

    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d") + timedelta(days=1)

    attachments = download_attachments_from_outlook(email_account, keywords, download_folder, start_date, end_date)

    print("\nSummary of downloaded files:")
    if not attachments:
        print("No new files were downloaded.")
    else:
        for attachment_path, received_time in attachments:
            print(f"{os.path.basename(attachment_path)} (Received: {received_time})")
        
        master_template_path = r"S:\Calendar Update File for DOR\Master Template.xlsx"
        all_event_info = []
        for attachment_path, received_time in attachments:
            event_info = extract_event_info_from_excel(attachment_path, received_time)
            all_event_info.extend(event_info)
        
        update_master_template(master_template_path, all_event_info)
        print("Master template has been refreshed and updated.")

Enter your Outlook email account:  jingwei.guo@venetianlasvegas.com
Enter keywords for filtering (comma-separated):  calendar,entertainment,hotshot,strategy
Enter the start date (YYYY-MM-DD):  2024-06-18
Enter the end date (YYYY-MM-DD):  2024-08-09


File 11.01.23 Resort Strategy Meeting.xlsx already exists. Using existing file.
File 2024 Special Events Calendar by Month-CURRENT as of 7.26.24.pdf already exists. Using existing file.
File HOTSHOT Report 07.29.2024.xlsx already exists. Using existing file.
Skipped Sample Workbook.xlsx: Not an allowed file or does not match keywords.
File 2024 Entertainment Calendar DOR Updated 7.16.24.xlsx already exists. Using existing file.
Skipped image007.png: Not an allowed file or does not match keywords.
Skipped image008.png: Not an allowed file or does not match keywords.
Skipped image009.png: Not an allowed file or does not match keywords.

Summary of downloaded files:
11.01.23 Resort Strategy Meeting.xlsx (Received: 07/30/2024 10:02 AM)
2024 Special Events Calendar by Month-CURRENT as of 7.26.24.pdf (Received: 07/30/2024 10:02 AM)
HOTSHOT Report 07.29.2024.xlsx (Received: 07/30/2024 10:02 AM)
2024 Entertainment Calendar DOR Updated 7.16.24.xlsx (Received: 07/30/2024 10:02 AM)
'Resort Strate