In [1]:
import pathlib
from dataset import Dataset, BAD_MOOD

# Loading the dataset

In [2]:
DATA_DIR = pathlib.Path('other', 'daylio-data')
path = next(DATA_DIR.glob('*.csv'))
print(path)

other\daylio-data\data_2024_01_24.csv


In [3]:
df = Dataset(csv_file_path=path)

Dataset(591 entries)


In [4]:
df.head()
print(f'{df.mood()=}')

Dataset(591 entries)
[24.01.2024 18:41] 4.0 coding, satisfied, tired, did something, productive
[24.01.2024 18:33] 4.5 grateful, happy
[24.01.2024 02:16] 3.5 relaxed, home
[24.01.2024 00:57] 3.5 tired, home
[23.01.2024 22:12] 5.0 Damir, satisfied, tired, happy, swimming, friends
...
df.mood()=3.8587140439932317


# API

### `.sub`

Use the `.sub` method to filter entries and get a subset of the original dataset

- by included activities ('or' operator: all the entries which have at least one of the listed activities)

In [5]:
cycling_or_city_df = df.sub(incl_act={'cycling', 'city'})
cycling_or_city_df.head()
print(cycling_or_city_df.mood())

Dataset(69 entries)
[13.01.2024 16:48] 4.0 Lauren, date, excited, home, city, grateful, movies & series
[05.01.2024 14:06] 4.0 Lauren, date, city, grateful, unsure, anxious
[04.01.2024 20:56] 4.0 city, tired, Damir
[31.12.2023 16:48] 2.5 Lauren, sad, date, annoyed, city
[29.12.2023 14:36] 4.5 Lauren, date, excited, city, new place, grateful, happy
...
4.188405797101449


- by excluded activities (entries which don't have any of the listed activities)

_for both `incl_act` and `excl_act` the argument can be either `str` or `set[str]`_

In [6]:
without_friends = df.sub(excl_act='friends')
without_friends.head()
print(without_friends.mood())

Dataset(501 entries)
[24.01.2024 18:41] 4.0 coding, satisfied, tired, did something, productive
[24.01.2024 18:33] 4.5 grateful, happy
[24.01.2024 02:16] 3.5 relaxed, home
[24.01.2024 00:57] 3.5 tired, home
[23.01.2024 18:47] 4.0 coding, satisfied, tired, did something, productive
...
3.8353293413173652


- by mood values (can be either a value or a Container of values)

In [7]:
bad_mood = df.sub(mood=BAD_MOOD)
bad_mood

Dataset(19 entries)

- by included activities ('and' operator: all entries which have all the listed activities)

In [8]:
 #* Note: this is achieved by subscribing the dataset twice; 
 #* since .sub method returns a new dataset, it can be done in one line
cycling_and_swimming = df.sub(incl_act='cycling').sub(incl_act='swimming')
cycling_and_swimming.head()
print(cycling_and_swimming.mood())
print(cycling_and_swimming.activities().most_common(7))

Dataset(10 entries)
[13.10.2023 20:10] 5.0 tired, swimming, cycling
[16.09.2023 20:58] 5.0 satisfied, tired, cycling, relaxed, swimming
[04.09.2023 20:44] 5.0 Damir, satisfied, tired, cycling, relaxed, swimming
[22.08.2023 20:27] 5.0 satisfied, tired, cycling, swimming, friends
[19.08.2023 20:32] 5.0 Damir, tired, cycling, relaxed, swimming, friends
...
4.85
[('tired', 10), ('swimming', 10), ('cycling', 10), ('satisfied', 7), ('relaxed', 6), ('friends', 6), ('Damir', 5)]


- by a particular date

In [9]:
import datetime

july22 = df.sub(when=datetime.date(2023, 7, 22)) # or when='22.07.2023'
july22.head(n=-1)

Dataset(6 entries)
[22.07.2023 22:17] 3.5 relaxed, home
[22.07.2023 19:34] 4.0 chess, relaxed, home
[22.07.2023 17:09] 3.5 study, relaxed, home, cooking
[22.07.2023 14:22] 3.5 unsure, coding
[22.07.2023 13:30] 3.0 home, dota, friends, anxious
[22.07.2023 11:30] 4.0 satisfied, home


- by a subtring (or substrings) in the `note`

In [10]:
df_ktane = df.sub(note_contains='ktane')
df_ktane.head(-1)

Dataset(7 entries)
[22.11.2023 21:41] 4.0 Leha, satisfied, home, amused, movies & series, gaming, Martina
[10.10.2023 22:39] 3.5 productive, Leha, home, amused, gaming, friends, stressed
[06.10.2023 20:45] 4.0 Leha, excited, happy, gaming, friends
[04.10.2023 20:13] 5.0 Lauren, satisfied, new place, grateful, happy, movies & series, friends
[07.07.2023 20:00] 3.5 coding, shopping, Leha, walking, study, dota, gaming, friends, unsure
[03.07.2023 20:05] 4.0 coding, Leha, excited, tired, home, happy, gaming, friends, stressed
[01.07.2023 20:00] 4.0 Leha, excited, home, relaxed, movies & series, happy, gaming, Martina, friends


- by a predicate function (which takes an entry as an argument and returns a boolean)

In [11]:
from dataset import Entry


def pred(entry: Entry) -> bool:
    """it is sunday, the mood > 4, the note is not empty"""
    return entry.full_date.weekday() == 6 and entry.mood > 4 and entry.note != ''


df_sunday_good_mood_has_note = df.sub(predicate=pred)
df_sunday_good_mood_has_note

Dataset(18 entries)

### `.head`

Use the `.head` method to look at the latest entries of the dataset

In [12]:
cycling_or_city_df.head()

Dataset(69 entries)
[13.01.2024 16:48] 4.0 Lauren, date, excited, home, city, grateful, movies & series
[05.01.2024 14:06] 4.0 Lauren, date, city, grateful, unsure, anxious
[04.01.2024 20:56] 4.0 city, tired, Damir
[31.12.2023 16:48] 2.5 Lauren, sad, date, annoyed, city
[29.12.2023 14:36] 4.5 Lauren, date, excited, city, new place, grateful, happy
...


In [13]:
cycling_or_city_df.head(2)

Dataset(69 entries)
[13.01.2024 16:48] 4.0 Lauren, date, excited, home, city, grateful, movies & series
[05.01.2024 14:06] 4.0 Lauren, date, city, grateful, unsure, anxious
...


In [14]:
# prints all entries
cycling_or_city_df.head(-1)

Dataset(69 entries)
[13.01.2024 16:48] 4.0 Lauren, date, excited, home, city, grateful, movies & series
[05.01.2024 14:06] 4.0 Lauren, date, city, grateful, unsure, anxious
[04.01.2024 20:56] 4.0 city, tired, Damir
[31.12.2023 16:48] 2.5 Lauren, sad, date, annoyed, city
[29.12.2023 14:36] 4.5 Lauren, date, excited, city, new place, grateful, happy
[23.12.2023 14:22] 4.0 Lauren, date, satisfied, tired, city, new place, happy
[17.12.2023 19:22] 4.0 Lauren, date, tired, city, relaxed, grateful, happy
[17.12.2023 14:10] 3.5 Lauren, date, bored, tired, city
[13.12.2023 15:27] 5.0 Lauren, date, satisfied, city, happy
[13.12.2023 13:03] 3.5 nervous, city
[10.12.2023 20:00] 4.5 satisfied, tired, city, walking
[30.11.2023 23:37] 3.5 Lauren, tired, date, city
[24.11.2023 21:50] 4.0 Damir, satisfied, tired, social, city, happy
[30.10.2023 18:07] 5.0 Lauren, date, tired, city, grateful, happy
[26.10.2023 22:06] 4.0 Lauren, date, walking, satisfied, city, grateful, happy
[25.10.2023 19:41] 4.0 Laur

### `.mood`

Use the `.mood` method to get the average mood of all the entries in the dataset

In [15]:
round(cycling_or_city_df.mood(), 3)

4.188

### `.activities`

Use the `.activities` method to get a Counter object of all activities in the dataset

In [16]:
cnt = df.activities()
print(f'most common: {cnt.most_common(3)}')
print(f'least common: {cnt.most_common()[-1:-6:-1]}')


most common: [('home', 341), ('satisfied', 195), ('relaxed', 190)]
least common: [('helping', 1), ('meeting', 1), ('Dad', 1), ('guitar', 2), ('photography', 2)]


In [17]:
least_common = df.activities().most_common()[::-1]
least_common

[('helping', 1),
 ('meeting', 1),
 ('Dad', 1),
 ('guitar', 2),
 ('photography', 2),
 ('homework', 2),
 ('snacking', 3),
 ('laundry', 3),
 ('cinema', 3),
 ('reading', 3),
 ('group project', 3),
 ('Mom', 4),
 ('overheated', 5),
 ('travel', 6),
 ('annoyed', 6),
 ('sick', 7),
 ('exam', 9),
 ('family', 9),
 ('cleaning', 9),
 ('nervous', 10),
 ('worried', 10),
 ('stressed', 11),
 ('angry', 11),
 ('Azat', 11),
 ('productive', 11),
 ('underslept', 12),
 ('new place', 13),
 ('sad', 14),
 ('did something', 14),
 ('social', 15),
 ('Lion', 15),
 ('shopping', 16),
 ('woke up early', 17),
 ('Leha', 19),
 ('Damir', 20),
 ('walking', 24),
 ('class', 24),
 ('Martina', 24),
 ('swimming', 25),
 ('cooking', 26),
 ('piano', 27),
 ('dota', 29),
 ('school', 30),
 ('cycling', 33),
 ('chess', 34),
 ('procrastinating', 35),
 ('gaming', 43),
 ('anxious', 44),
 ('city', 45),
 ('amused', 46),
 ('bored', 52),
 ('study', 59),
 ('grateful', 59),
 ('movies & series', 78),
 ('excited', 78),
 ('friends', 90),
 ('unsure'

### `.get_datetimes`

Use the `.get_datetimes` method to get the list of all points in time when an entry was created

In [18]:
df.get_datetimes()[:5]

[datetime.datetime(2024, 1, 24, 18, 41),
 datetime.datetime(2024, 1, 24, 18, 33),
 datetime.datetime(2024, 1, 24, 2, 16),
 datetime.datetime(2024, 1, 24, 0, 57),
 datetime.datetime(2024, 1, 23, 22, 12)]

### `.group_by_day`
Use the `.group_by_day` method to get a dictionary mapping dates to a list of entries created on that date.
The dates are sorted in ascending order.
Note: this function supports lru_cache and since the Dataset is weakly-immutable, it is safe to use it.

In [19]:
groups = df.group_by_day()
for day, entries in groups.items():
    print(f'{day}: {len(entries)}')
    break

2023-07-01: 1


### `.__iter__`
`Dataset` defines `__iter__` method, so it can be used in `for` loops. The entries are sorted in descending order by the date of creation.

In [20]:
for entry in df:
    print(entry)
    break

[24.01.2024 18:41] 4.0 coding, satisfied, tired, did something, productive


### `.__getitem__`
`Dataset` defines `__getitem__` method where the argument is a date in a string format (e.g. `'01.01.2024'`). It returns a list of entries created on that date.

Note: It is equivalent to `df.sub(date='01.01.2024').entries[::-1]` (it's reversed because in the root dataset the entries are sorted in descending order by the date of creation).

In [21]:
july22_entries = df['22.07.2023']
assert july22_entries == july22.entries[::-1]

# Analysis examples

## Mood analysis

In [22]:
activity = 'cycling'
mood_with, mood_without = df.mood_with_without(activity)

In [23]:
print(f'''"{activity}"
with: {mood_with:.2f}
without: {mood_without:.2f}
change: {(mood_with - mood_without)/mood_without:.1%}'''
)

"cycling"
with: 4.38
without: 3.83
change: 14.4%


## Complete analysis

In [24]:
complete_analysis = df.complete_analysis()

In [25]:
print(f'analysed {len(complete_analysis)} actvities')
for _name, _with, _without, _change, _num_occ in complete_analysis:
    print(f'[{_name:^15}]: {_change:.1%} (with: {_with:.2f}, without: {_without:.2f}); occured {_num_occ} times')
# TODO: maybe make this a pretty figure?

analysed 46 actvities
[     happy     ]: 18.6% (with: 4.38, without: 3.70); occured 140 times
[   swimming    ]: 17.5% (with: 4.50, without: 3.83); occured 25 times
[   grateful    ]: 16.0% (with: 4.41, without: 3.80); occured 59 times
[     Damir     ]: 14.6% (with: 4.40, without: 3.84); occured 20 times
[    cycling    ]: 14.4% (with: 4.38, without: 3.83); occured 33 times
[    Lauren     ]: 12.9% (with: 4.24, without: 3.76); occured 123 times
[     date      ]: 12.5% (with: 4.25, without: 3.77); occured 105 times
[   satisfied   ]: 10.6% (with: 4.12, without: 3.73); occured 195 times
[     city      ]: 8.7% (with: 4.17, without: 3.83); occured 45 times
[    walking    ]: 8.3% (with: 4.17, without: 3.85); occured 24 times
[   new place   ]: 6.8% (with: 4.12, without: 3.85); occured 13 times
[movies & series]: 6.8% (with: 4.08, without: 3.82); occured 78 times
[     piano     ]: 6.4% (with: 4.09, without: 3.85); occured 27 times
[    amused     ]: 5.5% (with: 4.05, without: 3.84); occ

## Mood plot

In [26]:
df.mood_plot()

## Bar plot by hour/day/month

In [27]:
df.by_time_bar_plot('hour')

In [28]:
df.by_time_bar_plot('day')

In [29]:
df.by_time_bar_plot('weekday')

In [30]:
df.by_time_bar_plot('month')

## Other features

In [31]:
df.note_length_plot(400)