In [None]:
# Upgrading Pandas for better rolling window support
!pip install icalendar
!pip install -I pandas==0.18.1

In [None]:
%matplotlib inline

import datetime
import pandas as pd
import numpy as np
import matplotlib as mpl
import pylab as plt

mpl.rcParams['savefig.dpi'] = 1.5 * mpl.rcParams['savefig.dpi']
mpl.style.use('ggplot')

pd.set_option('display.max_columns', 15)
pd.set_option('display.width', 500)

# CONFIG
MY_EMAIL_ADDRESS = "phartig@twitter.com"
INPUT_FILE = 'phartig@twitter.com.ics'
START_DATE = pd.Timestamp("2013-09-16")

In [None]:
from icalendar import Calendar, Event, vCalAddress

with open(INPUT_FILE, 'r') as fp:
    cal = Calendar.from_ical(fp.read())

In [None]:
pd.Timestamp("2013-09-16")

In [None]:
is_confirmed = lambda c: c.get('STATUS') == 'CONFIRMED'
is_allday = lambda c: c.get('dtend') is None
is_event_type = lambda c, t: c.name == t
is_special = lambda c: c.get('summary') is not None and c.get('summary').lower() in ['gym', 'grab dinner', 'work out', 'private meeting']

def is_accepted(component):
    attendees = component.get('attendee')
    attendees = attendees if type(attendees) == list else [attendees]
    me = list(filter(lambda x: x == vCalAddress('mailto:' + MY_EMAIL_ADDRESS), attendees))
    
    if not me:
        return False
    else:
        return me[0].params['PARTSTAT'] == 'ACCEPTED'


def normalize_date(d):
    ts = pd.Timestamp(d)
    if ts.tz is None:
        return ts.tz_localize('UTC')
    else:
        return ts.tz_convert('UTC')
    
meetings = []


for component in cal.walk():
    if all([is_event_type(component, "VEVENT"),
            is_confirmed(component),
            not is_allday(component),
            is_accepted(component),
            not is_special(component)]):
        meetings.append([component.get('summary'), normalize_date(component.get('dtstart').dt), normalize_date(component.get('dtend').dt)])
            
m = pd.DataFrame(meetings, columns=['summary', 'start', 'end'])
m['duration'] = m['end'] - m['start']
m = m[m['start'] <= pd.Timestamp(datetime.datetime.now())]
m = m[m['start'] >= START_DATE]

In [None]:
m.sort_values('start', inplace=True)
m.head(10)

In [None]:
m_ = m[m['duration'] < datetime.timedelta(hours=8)]
total = m_['duration']

bus_days = np.busday_count(m.iloc[0]['start'].date(), m.iloc[-1]['end'].date())

print("Time analyzed:", m.iloc[-1]['end'] - m.iloc[0]['start'])
print("Business days analyzed:", bus_days)
print("Total # of meetings:", total.count())
print("Total time in meetings:", m_['duration'].sum())
print("Time in meetings per business day: {!s}".format(np.timedelta64(m_['duration'].sum() / bus_days, 'm')))

In [None]:
m_.sort_values('duration', ascending=False)

In [None]:
hist = m_[m_['duration'] < datetime.timedelta(hours=4)]['duration'] \
    .apply(lambda x: pd.to_timedelta(x, unit='m').seconds / 60) \
    .hist(bins=25)

hist.set_title("Meeting Duration")
hist.set_ylabel("Frequency")
hist.set_xlabel("Duration (minutes)")

In [None]:
m_[m_['duration'] == datetime.timedelta(hours=2)]

In [None]:
m2 = m_.copy()
m2.set_index('end', inplace=True)
m2 = m2.sort_index().truncate(before=m.iloc[0]['start'], after=m.iloc[-1]['end'])
win = m2['duration']\
    .map(lambda x: x.total_seconds() / 60 * 60)\
    .groupby(pd.TimeGrouper('D'))\
    .transform(np.sum)\
    .resample('D')\
    .mean()\
    .dropna()\
    .rolling(window=15)\
    .mean()\
    .plot()
    
win.set_title('Rolling Window of Time in Meetings')
win.set_xlabel('Time')
win.set_ylabel('Minutes in Meetings')

In [None]:
m3 = m_.copy()
m3.set_index('start', inplace=True)
m3.tail()