In [7]:
pip install pandas icalendar pytz

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
import pandas as pd
from icalendar import Calendar
from datetime import datetime, date
import pytz
from secrets import MY_CALENDAR_EMAIL

def parse_ics_file(file_path):
    # Read the ICS file
    with open(file_path, 'rb') as f:
        cal = Calendar.from_ical(f.read())
    
    # List to store meeting data
    meetings = []
    
    # Iterate through all events
    for event in cal.walk('VEVENT'):
        # Get start time
        start = event.get('dtstart').dt
        # Convert datetime to timezone-aware if it's a datetime object
        if isinstance(start, datetime) and start.tzinfo is None:
            start = pytz.UTC.localize(start)
        
        # Get end time
        end = event.get('dtend').dt
        if isinstance(end, datetime) and end.tzinfo is None:
            end = pytz.UTC.localize(end)

        last_modified = event.get('last-modified').dt 
        if isinstance(last_modified, datetime) and last_modified.tzinfo is None:
            last_modified = pytz.UTC.localize(last_modified)  

        # Extract other event details
        meeting = {
            'summary': str(event.get('summary', '')),
            'start': start,
            'end': end,
            'location': str(event.get('location', '')),
            'description': str(event.get('description', '')),
            'last_modified': last_modified,
            'organizer': str(event.get('organizer', '')),
            'attendee': str(event.get('attendee', '')),
            'attendees_count': len(event.get('attendee', [])),
        }
        meetings.append(meeting)
    
    # Create DataFrame
    df = pd.DataFrame(meetings)
        
    return df

# Process the file
df = parse_ics_file('data/raw/my-calendar.ics')

In [9]:
def clean_organizer_attendee(df):
    copy = df.copy()
    copy['organizer'] = copy['organizer'].str.replace("mailto:", "").str.replace('unknownorganizer@calendar.google.com', '')
    copy['attendee'] = copy['attendee'].str.replace("mailto:", "").str.replace("vCalAddress", "").str.replace("'", "").str.replace("[", "").str.replace("]", "").str.replace("(", "").str.replace(")", "").str.strip(',')
    
    copy['attendess'] = copy['organizer'] + ',' + copy['attendee']
    copy['attendess'] = copy['attendess'].str.replace(MY_CALENDAR_EMAIL, "").str.strip(',')
    copy['attendess'] = copy['attendess'].str.replace(',,', ',').str.strip(', ').str.replace(', ', ',')
    copy = copy.drop(columns = ['organizer', 'attendee'])
    return copy

df = clean_organizer_attendee(df)

In [5]:
df.to_csv('data/processed/my-calendar.csv', index=False)