In [1]:
import pathlib
from dataset import Dataset, BAD_MOOD, GOOD_MOOD, AVERAGE_MOOD

# Loading the dataset

In [2]:
df = Dataset(csv_file_path=pathlib.Path('other', 'daylio-data', 'data_2023_11_08.csv'), remove=True)

Dataset(353 entries)


In [3]:
df.head()

Dataset(353 entries)
[2023-11-08 12:19] 4.0 home, satisfied, relaxed
[2023-11-07 19:24] 4.0 satisfied, happy, tired, class, school
[2023-11-07 17:24] 3.5 
[2023-11-07 16:03] 4.0 home, satisfied, coding, family
[2023-11-07 13:43] 3.5 unsure, coding, home, bored, class, relaxed
...


# API

### `.sub`

Use the `.sub` method to filter entries and get a subset of the original dataset

- by included activities ('or' operator: all the entries which have at least one of the listed activities)

In [4]:
cycling_df = df.sub(incl_act={'cycling', 'city'})
cycling_df.head()
print(cycling_df.mood())

Dataset(56 entries)
[2023-10-30 18:07] 5.0 grateful, date, city, happy, tired
[2023-10-26 22:06] 4.0 satisfied, grateful, date, city, happy, walking
[2023-10-25 19:41] 4.0 date, city, happy, walking
[2023-10-22 21:01] 4.0 grateful, excited, date, city, tired, new place
[2023-10-21 20:00] 4.0 satisfied, date, city, relaxed, movies & series
...
4.258928571428571


- by excluded activities (entries which don't have any of the listed activities)

_for both `incl_act` and `excl_act` the argument can be either `str` or `set[str]`_

In [8]:
without_friends = df.sub(excl_act='friends')
without_friends.head()
print(without_friends.mood())

Dataset(290 entries)
[2023-11-08 12:19] 4.0 home, satisfied, relaxed
[2023-11-07 19:24] 4.0 satisfied, happy, tired, class, school
[2023-11-07 17:24] 3.5 
[2023-11-07 16:03] 4.0 home, satisfied, coding, family
[2023-11-07 13:43] 3.5 unsure, coding, home, bored, class, relaxed
...
3.7896551724137932


- by mood values (can be either a value or a Container of values)

In [6]:
bad_mood = df.sub(mood=BAD_MOOD)
bad_mood

Dataset(14 entries)

- by included activities ('and' operator: all entries which have all the listed activities)

In [9]:
 #* Note: this is achieved by subscribing the dataset twice; 
 #* since .sub method returns a new dataset, it can be done in one line
cycling_and_swimming = df.sub(incl_act='cycling').sub(incl_act='swimming')
cycling_and_swimming.head()
print(cycling_and_swimming.mood())
print(cycling_and_swimming.activities().most_common(7))

Dataset(10 entries)
[2023-10-13 20:10] 5.0 swimming, cycling, tired
[2023-09-16 20:58] 5.0 swimming, satisfied, cycling, tired, relaxed
[2023-09-04 20:44] 5.0 swimming, satisfied, cycling, tired, relaxed
[2023-08-22 20:27] 5.0 swimming, satisfied, cycling, friends, tired
[2023-08-19 20:32] 5.0 swimming, cycling, friends, tired, relaxed
...
4.9
[('swimming', 10), ('cycling', 10), ('tired', 10), ('satisfied', 7), ('relaxed', 6), ('friends', 6), ('city', 3)]


- by a particular date

In [8]:
import datetime

july22 = df.sub(when=datetime.date(2023, 7, 22))
july22.head(n=-1)

Dataset(6 entries)
[2023-07-22 22:17] 3.5 home, relaxed
[2023-07-22 19:34] 4.0 home, chess, relaxed
[2023-07-22 17:09] 3.5 study, home, cooking, relaxed
[2023-07-22 14:22] 3.5 unsure, coding
[2023-07-22 13:30] 3.0 friends, dota, anxious, home
[2023-07-22 11:30] 4.0 home, satisfied


- by a subtring (or substrings) in the `note`

In [12]:
df_Leha = df.sub(note_contains=['leha', 'лех'])
df_Leha.head(-1)

Dataset(9 entries)
[2023-11-04 20:00] 4.0 happy, satisfied, gaming, friends, amused, tired, home, relaxed
[2023-11-02 21:23] 4.0 friends, gaming, excited, amused
[2023-10-10 22:39] 3.5 friends, gaming, amused, stressed, home, productive
[2023-10-06 20:45] 4.0 happy, gaming, excited, friends
[2023-07-17 16:10] 3.5 stressed, cooking, home, chess, excited
[2023-07-14 13:33] 4.0 friends, satisfied, procrastinating, excited, coding
[2023-07-07 20:00] 3.5 friends, gaming, dota, study, walking, shopping, unsure, coding
[2023-07-03 20:05] 4.0 happy, friends, gaming, stressed, tired, home, excited, coding
[2023-07-01 20:00] 4.0 happy, friends, gaming, home, movies & series, excited, relaxed


In [12]:
df_ktane = df.sub(note_contains='ktane')
df_ktane.head(-1)

Dataset(6 entries)
[2023-10-10 22:39] 3.5 amused, gaming, stressed, home, friends, productive
[2023-10-06 20:45] 4.0 excited, friends, happy, gaming
[2023-10-04 20:13] 5.0 satisfied, grateful, friends, happy, new place, movies & series
[2023-07-07 20:00] 3.5 gaming, unsure, walking, coding, shopping, dota, friends, study
[2023-07-03 20:05] 4.0 gaming, stressed, excited, coding, home, happy, tired, friends
[2023-07-01 20:00] 4.0 gaming, excited, home, happy, friends, relaxed, movies & series


### `.head`

Use the `.head` method to look at the latest entries of the dataset

In [8]:
cycling_df.head()

Dataset(56 entries)
[2023-10-30 18:07] 5.0 city, tired, date, happy, grateful
[2023-10-26 22:06] 4.0 city, date, happy, grateful, satisfied, walking
[2023-10-25 19:41] 4.0 city, date, walking, happy
[2023-10-22 21:01] 4.0 city, tired, new place, date, grateful, excited
[2023-10-21 20:00] 4.0 city, date, relaxed, movies & series, satisfied
...


In [9]:
cycling_df.head(2)

Dataset(56 entries)
[2023-10-30 18:07] 5.0 city, tired, date, happy, grateful
[2023-10-26 22:06] 4.0 city, date, happy, grateful, satisfied, walking
...


In [10]:
# prints all entries
cycling_df.head(-1)

Dataset(56 entries)
[2023-10-30 18:07] 5.0 city, tired, date, happy, grateful
[2023-10-26 22:06] 4.0 city, date, happy, grateful, satisfied, walking
[2023-10-25 19:41] 4.0 city, date, walking, happy
[2023-10-22 21:01] 4.0 city, tired, new place, date, grateful, excited
[2023-10-21 20:00] 4.0 city, date, relaxed, movies & series, satisfied
[2023-10-19 14:35] 4.0 friends, city
[2023-10-14 20:00] 4.0 city, new place, tired
[2023-10-13 20:10] 5.0 cycling, swimming, tired
[2023-10-08 20:00] 4.0 grateful, city, friends
[2023-10-07 15:56] 4.0 city, amused, relaxed, reading, satisfied, walking
[2023-10-03 20:01] 4.0 city, happy, friends, satisfied, walking
[2023-10-02 20:01] 5.0 city, happy, friends, satisfied, walking
[2023-09-30 13:29] 4.0 cycling, relaxed, shopping, unsure
[2023-09-23 13:55] 4.0 city, satisfied, bored
[2023-09-16 20:58] 5.0 swimming, tired, relaxed, cycling, satisfied
[2023-09-15 15:47] 4.0 city, cycling, shopping
[2023-09-10 18:40] 5.0 city, cycling, relaxed
[2023-09-09 20

### `.mood`

Use the `.mood` method to get the average mood of all the entries in the dataset

In [11]:
round(
    cycling_df.mood(),
    3
)

4.259

### `.count`

Use the `.count` method to count entries with the given conditions.

This is equivalent to `len(df.sub(...))` with the same arguments, but the `.count` method does not create a new dataset.

In [10]:
df.count(incl_act='friends', mood={2, 3})

7

### `.activities`

Use the `.activities` method to get a Counter object of all activities in the dataset

In [12]:
df.activities().most_common(3)

[('home', 197), ('relaxed', 133), ('satisfied', 107)]

### `.get_datetimes`

Use the `.get_datetimes` method to get the list of all points in time when an entry was created

In [13]:
df.get_datetimes()[:5]

[datetime.datetime(2023, 11, 8, 12, 19),
 datetime.datetime(2023, 11, 7, 19, 24),
 datetime.datetime(2023, 11, 7, 17, 24),
 datetime.datetime(2023, 11, 7, 16, 3),
 datetime.datetime(2023, 11, 7, 13, 43)]

# Analysis examples

## Mood analysis

In [18]:
activity = 'cycling'
mood_with, mood_without = df.mood_with_without(activity)

In [19]:
print(f'''"{activity}"
with: {mood_with:.2f}
without: {mood_without:.2f}
change: {(mood_with - mood_without)/mood_without:.1%}'''
)

"cycling"
with: 4.39
without: 3.77
change: 16.7%


In [11]:
df.count(incl_act='cycling')

33

## Mood graph

In [21]:
df.mood_graph()