In [1]:
import pathlib
from dataset import Dataset, BAD_MOOD, GOOD_MOOD, AVERAGE_MOOD

# Loading the dataset

In [2]:
df = Dataset(csv_file_path=pathlib.Path('other', 'daylio-data', 'data_2023_10_07.csv'), remove=True)

Dataset(267 entries)


In [3]:
df.head()

Dataset(267 entries)
[2023-10-06 20:45] 4.0 friends, excited, gaming, happy
[2023-10-06 16:39] 3.5 home, relaxed
[2023-10-06 13:02] 5.0 grateful, reading, walking, satisfied, happy
[2023-10-06 01:03] 3.5 relaxed
[2023-10-05 21:10] 4.0 reading, swimming
...


# API

### `.sub`

Use the `.sub` method to filter entries and get a subset of the original dataset

- by included activities ('or' operator: all the entries which have at least one of the listed activities)

In [4]:
cycling_df = df.sub(incl_act={'cycling', 'city'})
cycling_df.head()
print(cycling_df.mood())

Dataset(46 entries)
[2023-10-03 20:01] 4.0 city, happy, satisfied, friends, walking
[2023-10-02 20:01] 5.0 city, happy, satisfied, friends, walking
[2023-09-30 13:29] 4.0 shopping, cycling, relaxed, unsure
[2023-09-23 13:55] 4.0 bored, city, satisfied
[2023-09-16 20:58] 5.0 tired, satisfied, relaxed, cycling, swimming
...
4.271739130434782


- by excluded activities (entries which don't have any of the listed activities)

In [6]:
without_friends = df.sub(excl_act={'friends'})
without_friends.head()
print(without_friends.mood())

Dataset(213 entries)
[2023-10-06 16:39] 3.5 home, relaxed
[2023-10-06 13:02] 5.0 happy, reading, satisfied, grateful, walking
[2023-10-06 01:03] 3.5 relaxed
[2023-10-05 21:10] 4.0 reading, swimming
[2023-10-05 09:55] 3.5 home, relaxed
...
3.7417840375586855


- by mood values (can be either a value or a Container of values)

In [7]:
bad_mood = df.sub(mood=BAD_MOOD)
bad_mood

Dataset(14 entries)

- by included activities ('and' operator: all entries which have all the listed activities)

In [8]:
 #* Note: this is achieved by subscribing the dataset twice; 
 #* since .sub method returns a new dataset, it can be done in one line
cycling_and_swimming = df.sub(incl_act={'cycling'}).sub(incl_act={'swimming'})
cycling_and_swimming.head()
print(cycling_and_swimming.mood())
print(cycling_and_swimming.activities().most_common(7))

Dataset(9 entries)
[2023-09-16 20:58] 5.0 tired, satisfied, relaxed, cycling, swimming
[2023-09-04 20:44] 5.0 tired, satisfied, relaxed, cycling, swimming
[2023-08-22 20:27] 5.0 tired, satisfied, friends, cycling, swimming
[2023-08-19 20:32] 5.0 tired, friends, relaxed, cycling, swimming
[2023-08-16 20:03] 5.0 tired, city, happy, satisfied, friends, cycling, swimming
...
4.888888888888889
[('tired', 9), ('cycling', 9), ('swimming', 9), ('satisfied', 7), ('relaxed', 6), ('friends', 6), ('city', 3)]


- by a particular date

In [9]:
import datetime

july22 = df.sub(when=datetime.date(2023, 7, 22))
july22.head(n=-1)

Dataset(6 entries)
[2023-07-22 22:17] 3.5 home, relaxed
[2023-07-22 19:34] 4.0 home, chess, relaxed
[2023-07-22 17:09] 3.5 study, home, cooking, relaxed
[2023-07-22 14:22] 3.5 coding, unsure
[2023-07-22 13:30] 3.0 dota, friends, home, anxious
[2023-07-22 11:30] 4.0 home, satisfied


### `.head`

Use the `.head` method to look at the latest entries of the dataset

In [10]:
cycling_df.head()

Dataset(46 entries)
[2023-10-03 20:01] 4.0 city, happy, satisfied, friends, walking
[2023-10-02 20:01] 5.0 city, happy, satisfied, friends, walking
[2023-09-30 13:29] 4.0 shopping, cycling, relaxed, unsure
[2023-09-23 13:55] 4.0 bored, city, satisfied
[2023-09-16 20:58] 5.0 tired, satisfied, relaxed, cycling, swimming
...


In [11]:
cycling_df.head(2)

Dataset(46 entries)
[2023-10-03 20:01] 4.0 city, happy, satisfied, friends, walking
[2023-10-02 20:01] 5.0 city, happy, satisfied, friends, walking
...


In [12]:
# prints all entries
cycling_df.head(-1)

Dataset(46 entries)
[2023-10-03 20:01] 4.0 city, happy, satisfied, friends, walking
[2023-10-02 20:01] 5.0 city, happy, satisfied, friends, walking
[2023-09-30 13:29] 4.0 shopping, cycling, relaxed, unsure
[2023-09-23 13:55] 4.0 bored, city, satisfied
[2023-09-16 20:58] 5.0 tired, satisfied, relaxed, cycling, swimming
[2023-09-15 15:47] 4.0 shopping, cycling, city
[2023-09-10 18:40] 5.0 city, cycling, relaxed
[2023-09-09 20:01] 4.0 shopping, chess, bored, did something, relaxed, home, cycling
[2023-09-08 21:12] 4.0 procrastinating, study, shopping, satisfied, bored, home, cycling
[2023-09-05 09:55] 4.0 city, friends, excited
[2023-09-04 20:44] 5.0 tired, satisfied, relaxed, cycling, swimming
[2023-09-01 20:05] 4.0 city, friends, swimming, satisfied
[2023-08-28 23:06] 4.0 tired, city, new place, travel, satisfied, anxious, walking
[2023-08-25 20:04] 3.5 tired, movies & series, cinema, city
[2023-08-25 16:08] 4.0 tired, new place, city, satisfied
[2023-08-22 20:27] 5.0 tired, satisfied, 

### `.mood`

Use the `.mood` method to get the average mood of all the entries in the dataset

In [13]:
round(
    cycling_df.mood(),
    3
)

4.272

### `.count`

Use the `.count` method to count entries with the given conditions.

This is equivalent to `len(df.sub(...))` with the same arguments, but the `.count` method does not create a new dataset.

In [8]:
df.count(incl_act={'friends'}, mood={2, 3})

2

### `.activities`

Use the `.activities` method to get a Counter object of all activities in the dataset

In [14]:
df.activities().most_common(3)

[('home', 158), ('relaxed', 106), ('satisfied', 81)]

### `.get_datetimes`

Use the `.get_datetimes` method to get the list of all points in time when an entry was created

In [15]:
df.get_datetimes()[:5]

[datetime.datetime(2023, 10, 6, 20, 45),
 datetime.datetime(2023, 10, 6, 16, 39),
 datetime.datetime(2023, 10, 6, 13, 2),
 datetime.datetime(2023, 10, 6, 1, 3),
 datetime.datetime(2023, 10, 5, 21, 10)]

# Analysis examples

## Mood analysis

In [4]:
activity = 'cycling'
mood_with, mood_without = df.mood_with_without(activity)

In [5]:
print(f'''"{activity}"
with: {mood_with:.2f}
without: {mood_without:.2f}
change: {(mood_with - mood_without)/mood_without:.1%}'''
)

"cycling"
with: 4.38
without: 3.71
change: 17.8%


In [6]:
df.count(incl_act={'cycling'})

32

## Mood graph

In [3]:
df.mood_graph()