## IMPORTS

In [1]:
import ics
import pandas as pd
import matplotlib.pyplot as plt

## UTIL

In [37]:
def event_to_dict(event):
    return{
        'name':event.name,
        'begin':event.begin.date().strftime('%Y-%m-%d'),
        'duration_mins': event.duration.seconds/60,
    }

## ICS TO DF

In [38]:
file_path = 'D:/00_SARDARCHITECTLABS/local data/google-calendar-data/Projects_cadsvskbdp4b4a5njoeh4lp314@group.calendar.google.com.ics'

with open(file_path,'r') as f:
    icsFile = ics.Calendar(f.read())
    events = [event_to_dict(event) for event in icsFile.events]
#     print(events)

In [39]:
df = pd.DataFrame(events)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 993 entries, 0 to 992
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   name           993 non-null    object 
 1   begin          993 non-null    object 
 2   duration_mins  993 non-null    float64
dtypes: float64(1), object(2)
memory usage: 23.4+ KB
None


In [40]:
df.head()

Unnamed: 0,name,begin,duration_mins
0,Startup: Admin,2022-08-06,30.0
1,Help Kierra with ML,2022-04-19,105.0
2,Things to Do,2021-06-28,60.0
3,Startup: Operations,2022-09-13,30.0
4,ML: RNNs,2021-01-27,105.0


## DATA CLEANING

In [41]:
# Convert Dates to Pandas DateTime
df['begin'] = pd.to_datetime(df['begin']).dt.normalize()

In [42]:
# Add column for duration_hrs
df['duration_hrs'] = df['duration_mins']/60

In [43]:
# Sort values by begin date and reset index
df = df.sort_values('begin', ascending=False)
df.reset_index(drop=True,inplace=True)

In [48]:
df.head(30)

Unnamed: 0,name,begin,duration_mins,duration_hrs
0,Meeting: Rosa Taormina,2022-11-30,60.0,1.0
1,UCSD: Unit 10,2022-11-06,30.0,0.5
2,Weekly Goals Review,2022-11-06,120.0,2.0
3,UCSD: Review,2022-11-06,45.0,0.75
4,UCSD: Review,2022-11-06,105.0,1.75
5,UCSD: Unit 10,2022-11-05,120.0,2.0
7,UCSD: Unit 10,2022-11-05,105.0,1.75
9,Meeting: Arnav,2022-11-05,30.0,0.5
10,UCSD: Unit 10,2022-11-04,120.0,2.0
12,UCSD: Review,2022-11-02,150.0,2.5


In [45]:
# Split Job and Projects
activity_list = sorted(df["name"].unique().tolist())
activity_list

['11:',
 '17:',
 '1:',
 '7:',
 '8:',
 'AI Papers',
 'AI Podcast',
 'Architecture Book Reading',
 'Architecture Floor Plan Generation',
 'Artificial Intelligence: State of the Art - Lex Fridman',
 'BHCC Course Registration',
 'Blog Writing',
 'Bootcamp Assesment',
 'CS: Data Structures and Algorithms',
 'Calendar Management',
 'Call: Angel',
 'Call: Arnav Dasaur',
 'Call: Mike Mooney',
 'Call: Robert Aung',
 'Call: Rosa (SBA Mentor)',
 'Call: Scott',
 'Cleaning',
 'Clubhouse with Brendan',
 'Coding Practice',
 'Cofounder Matching',
 'Computational Portfolio',
 'Computer Setup',
 'Coures: Linear Algebra',
 'Course: C++',
 'Course: CSE599',
 'Course: Data Structures',
 'Course: Discrete Math',
 'Course: EECS589',
 'Course: EECS598',
 'Course: EECS598_HW',
 'Course: Graphics',
 'Course: Math for ML',
 'Course: Reinforcement Learning',
 'Course: UCSD Prep',
 'Course: Unreal Engine',
 'Course: Unreal Engine 5',
 'Coursera: Andrew Ng',
 'Coursera: Deep Learning',
 'Daily Journal Updates',
 'D

In [46]:
# Remove Edge, EDA Work, Work, Work: Walker Consultants
indexWork = df[(df['name'] == 'Work: Walker Consultants') | (df['name'] == 'Work') | (df['name'] == 'EDA Work')| (df['name'] == 'Edge')].index
df.drop(indexWork, inplace=True)
len(df)


916

## Hours worked (Daily, Weekly, Monthly)

In [47]:
daterange = pd.DataFrame(pd.date_range('2020-01-01','2022-12-31'))
daterange.rename(columns={0:'date'}, inplace=True)
daterange

Unnamed: 0,date
0,2020-01-01
1,2020-01-02
2,2020-01-03
3,2020-01-04
4,2020-01-05
...,...
1091,2022-12-27
1092,2022-12-28
1093,2022-12-29
1094,2022-12-30


In [None]:
data_2022 = df[df['begin'] >= "2020-01-01"]
data_2022.head()

In [None]:
df2022 = daterange.merge(data_2022, left_on='date', right_on='begin', how='left')

In [None]:
df2022

In [None]:
df2022['date'] = pd.to_datetime(df2022['date']) - pd.to_timedelta(7, unit='d')
df2022 = df2022.groupby([pd.Grouper(key="date", freq='W-MON')])['duration_hrs'].sum().reset_index().sort_values('date')
df2022.head()

In [None]:
df2022['duration_hrs'].plot()