# RescueTime: 
## Data Import, Data Visualization, Data Analysis

NOTE: Run RescueTime Downloader to Get your info

-----------

### Libraries and Dependencies

In [5]:
# Add Data Science Python Dependencies
import numpy as np
import pandas as pd
# from matplotlib import matplotlib.plot as plt
import seaborn as sns

## Import Data

In [62]:
activities = pd.read_csv("data/rescuetime-full-data-export.csv")
# if exists remove old index columns
activities.drop(['Unnamed: 0'], axis=1, inplace=True)
# relabel date to timestamp
activities.columns = ['Timestamp', 'Seconds', 'NumberPeople', 'Actitivity', 'Document', 'Category',
       'Productivity']

In [78]:
# activities.tail()

In [42]:
len(activities)

795418

In [67]:
activities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 795418 entries, 0 to 795417
Data columns (total 7 columns):
Timestamp       795418 non-null object
Seconds         795418 non-null int64
NumberPeople    795418 non-null int64
Actitivity      795418 non-null object
Document        795408 non-null object
Category        795418 non-null object
Productivity    795418 non-null int64
dtypes: int64(3), object(4)
memory usage: 42.5+ MB


In [79]:
# activities.describe()

------

## Data Processing

In [94]:
# new column for productivitity level 
# as very productive, productive, neutral, distracted, very distracted
activities['Productive'] = activities['Productivity']

activities['Productive'] = activities['Productive'].map({-2: 'very distracting', 
                                                        -1: 'distracting',
                                                       0: 'neutral',
                                                       1: 'productive',
                                                       2: 'very productive'})
activities.Productive.unique()

array(['neutral', 'productive', 'very productive', 'very distracting',
       'distracting'], dtype=object)

In [68]:
activities['Timestamp'] = pd.to_datetime(activities['Timestamp'])

In [70]:
# reorder and reindex based on date
activities = activities.sort_values(by='Timestamp').reset_index(drop=True)
# activities.reset_index(drop=True)

In [69]:
# date additions of Date timestamp
activities['Date'] = activities['Timestamp'].apply(lambda x: x.strftime('%Y-%m-%d')) # note: not very efficient
activities['Year'] = activities['Timestamp'].dt.year
activities['Month'] = activities['Timestamp'].dt.month
activities['Mnth_yr'] = activities['Timestamp'].apply(lambda x: x.strftime('%Y-%m')) # note: not very efficient
activities['Day'] = activities['Timestamp'].dt.day
activities['Dow'] = activities['Timestamp'].dt.weekday
activities['Hour'] = activities['Timestamp'].dt.hour

In [86]:
activities.tail()

Unnamed: 0,Timestamp,Seconds,NumberPeople,Actitivity,Document,Category,Productivity,Date,Year,Month,Mnth_yr,Day,Dow,Hour,Productive
795413,2018-09-19 23:00:00,42,1,issues.int3c.com,No Details,General Software Development,2,2018-09-19,2018,9,2018-09,19,2,23,very productive
795414,2018-09-19 23:00:00,111,1,drupal.org,No Details,General Software Development,2,2018-09-19,2018,9,2018-09,19,2,23,very productive
795415,2018-09-19 23:00:00,576,1,promos.testing.bookloverscon.com,No Details,General Software Development,2,2018-09-19,2018,9,2018-09,19,2,23,very productive
795416,2018-09-19 23:00:00,1,1,loginwindow,No Details,General Utilities,1,2018-09-19,2018,9,2018-09,19,2,23,productive
795417,2018-09-19 23:00:00,11,1,todoist,No Details,Project Management,2,2018-09-19,2018,9,2018-09,19,2,23,very productive


-----

# Total Computer Time per Day

In [72]:
total_computer_time_by_date = activities.groupby(['Date'])['Seconds'].sum().reset_index(name='Seconds')

In [73]:
total_computer_time_by_date['Minutes'] = round(total_computer_time_by_date['Seconds'] / 60, 2)
total_computer_time_by_date['Hours'] = round(total_computer_time_by_date['Seconds'] / 60 / 60, 2)

In [74]:
total_computer_time_by_date.tail()

Unnamed: 0,Date,Seconds,Minutes,Hours
2191,2018-09-15,28097,468.28,7.8
2192,2018-09-16,31573,526.22,8.77
2193,2018-09-17,27662,461.03,7.68
2194,2018-09-18,31094,518.23,8.64
2195,2018-09-19,39104,651.73,10.86


In [114]:
total_computer_time_by_date.to_csv("data/dates_computer_time.csv")

----

## Daily Productivity

In [96]:
total_by_date_productivity = activities.groupby(['Date', 'Productive'])['Seconds'].sum().reset_index(name='Seconds')

In [97]:
total_by_date_productivity['Minutes'] = round((total_by_date_productivity['Seconds'] / 60), 2)

In [98]:
# total_by_date_productivity
table = total_by_date_productivity.pivot_table(index='Date', columns='Productive', values='Seconds', aggfunc=np.sum)

In [99]:
# total_by_date_productivity
table = total_by_date_productivity.pivot_table(index='Date', columns='Productive', values='Minutes', aggfunc=np.sum)
table.tail(10)

Productive,distracting,neutral,productive,very distracting,very productive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-10,,39.12,54.23,5.72,367.37
2018-09-11,0.23,61.6,35.42,21.27,351.3
2018-09-12,1.68,44.88,20.12,2.02,348.03
2018-09-13,0.42,60.47,55.63,7.35,410.18
2018-09-14,0.47,72.77,26.57,32.27,470.47
2018-09-15,3.2,23.92,7.1,19.7,414.37
2018-09-16,0.1,58.72,40.2,20.52,406.68
2018-09-17,,95.68,15.82,1.35,348.18
2018-09-18,0.65,12.97,56.85,,447.77
2018-09-19,6.85,34.87,120.37,7.02,482.63


In [102]:
days_productive_time = table.copy()

In [105]:
days_productive_time['productive_simple'] = days_productive_time['productive'] + days_productive_time['very productive']
days_productive_time.drop(['productive', 'very productive'], axis=1, inplace=True)
days_productive_time['distracting_simple'] = days_productive_time['distracting'] + days_productive_time['very distracting']
days_productive_time.drop(['distracting', 'very distracting'], axis=1, inplace=True)

In [107]:
days_productive_time.columns = ['Neutral', 'Productive', 'Distracting']

In [108]:
days_productive_time.tail()

Unnamed: 0_level_0,Neutral,Productive,Distracting
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-09-15,23.92,421.47,22.9
2018-09-16,58.72,446.88,20.62
2018-09-17,95.68,364.0,
2018-09-18,12.97,504.62,
2018-09-19,34.87,603.0,13.87


In [109]:
days_productive_time.to_csv("data/days_productive_time.csv")

---

# Productive Hours

In [110]:
hourly = activities.groupby(['Date', 'Hour', 'Productive'])['Seconds'].sum().reset_index(name='Seconds')


In [111]:
hourly.tail()

Unnamed: 0,Date,Hour,Productive,Seconds
87071,2018-09-19,22,neutral,9
87072,2018-09-19,22,productive,476
87073,2018-09-19,22,very productive,3114
87074,2018-09-19,23,productive,1
87075,2018-09-19,23,very productive,745


In [113]:
hourly.to_csv("data/dates_hourly_productive_time.csv")

---------------------

## Data Exploration and Data Visualization