In [1]:
import pathlib
from dataset import Dataset, BAD_MOOD, GOOD_MOOD, AVERAGE_MOOD

# Loading the dataset

In [2]:
df = Dataset(csv_file_path=pathlib.Path('other', 'daylio-data', 'data_2023_12_02.csv'), remove=True)

Dataset(435 entries)


In [3]:
df.head()
print(df.mood())

Dataset(435 entries)
[02.12.2023 20:24] 4.0 Martina, movies & series, amused, friends, home
[02.12.2023 20:18] 3.5 home, Leha, excited, coding
[01.12.2023 20:01] 3.5 unsure, satisfied, coding, home, excited
[01.12.2023 19:25] 3.5 unsure, tired, coding
[01.12.2023 00:19] 3.0 home, tired, relaxed
...
3.8183908045977013


# API

### `.sub`

Use the `.sub` method to filter entries and get a subset of the original dataset

- by included activities ('or' operator: all the entries which have at least one of the listed activities)

In [4]:
cycling_or_city_df = df.sub(incl_act={'cycling', 'city'})
cycling_or_city_df.head()
print(cycling_or_city_df.mood())

Dataset(58 entries)
[30.11.2023 23:37] 3.5 city, Lauren, date, tired
[24.11.2023 21:50] 4.0 satisfied, happy, Damir, city, social, tired
[30.10.2023 18:07] 5.0 happy, date, grateful, city, Lauren, tired
[26.10.2023 22:06] 4.0 satisfied, happy, date, grateful, city, Lauren, walking
[25.10.2023 19:41] 4.0 happy, date, city, Lauren, walking
...
4.232758620689655


- by excluded activities (entries which don't have any of the listed activities)

_for both `incl_act` and `excl_act` the argument can be either `str` or `set[str]`_

In [5]:
without_friends = df.sub(excl_act='friends')
without_friends.head()
print(without_friends.mood())

Dataset(361 entries)
[02.12.2023 20:18] 3.5 home, Leha, excited, coding
[01.12.2023 20:01] 3.5 unsure, satisfied, coding, home, excited
[01.12.2023 19:25] 3.5 unsure, tired, coding
[01.12.2023 00:19] 3.0 home, tired, relaxed
[30.11.2023 23:37] 3.5 city, Lauren, date, tired
...
3.78393351800554


- by mood values (can be either a value or a Container of values)

In [6]:
bad_mood = df.sub(mood=BAD_MOOD)
bad_mood

Dataset(17 entries)

- by included activities ('and' operator: all entries which have all the listed activities)

In [7]:
 #* Note: this is achieved by subscribing the dataset twice; 
 #* since .sub method returns a new dataset, it can be done in one line
cycling_and_swimming = df.sub(incl_act='cycling').sub(incl_act='swimming')
cycling_and_swimming.head()
print(cycling_and_swimming.mood())
print(cycling_and_swimming.activities().most_common(7))

Dataset(10 entries)
[13.10.2023 20:10] 5.0 cycling, tired, swimming
[16.09.2023 20:58] 5.0 cycling, swimming, satisfied, relaxed, tired
[04.09.2023 20:44] 5.0 cycling, swimming, satisfied, Damir, relaxed, tired
[22.08.2023 20:27] 5.0 cycling, swimming, satisfied, friends, tired
[19.08.2023 20:32] 5.0 cycling, swimming, Damir, friends, relaxed, tired
...
4.85
[('cycling', 10), ('tired', 10), ('swimming', 10), ('satisfied', 7), ('relaxed', 6), ('friends', 6), ('Damir', 5)]


- by a particular date

In [8]:
import datetime

july22 = df.sub(when=datetime.date(2023, 7, 22))
july22.head(n=-1)

Dataset(6 entries)
[22.07.2023 22:17] 3.5 home, relaxed
[22.07.2023 19:34] 4.0 chess, home, relaxed
[22.07.2023 17:09] 3.5 home, cooking, relaxed, study
[22.07.2023 14:22] 3.5 unsure, coding
[22.07.2023 13:30] 3.0 dota, friends, home, anxious
[22.07.2023 11:30] 4.0 home, satisfied


- by a subtring (or substrings) in the `note`

In [9]:
df_ktane = df.sub(note_contains='ktane')
df_ktane.head(-1)

Dataset(7 entries)
[22.11.2023 21:41] 4.0 Martina, gaming, Leha, satisfied, movies & series, amused, home
[10.10.2023 22:39] 3.5 gaming, Leha, amused, friends, stressed, home, productive
[06.10.2023 20:45] 4.0 gaming, Leha, happy, friends, excited
[04.10.2023 20:13] 5.0 satisfied, happy, movies & series, grateful, friends, new place, Lauren
[07.07.2023 20:00] 3.5 gaming, unsure, Leha, shopping, coding, dota, friends, study, walking
[03.07.2023 20:05] 4.0 gaming, Leha, happy, coding, friends, stressed, home, tired, excited
[01.07.2023 20:00] 4.0 Martina, gaming, Leha, happy, movies & series, friends, relaxed, home, excited


### `.head`

Use the `.head` method to look at the latest entries of the dataset

In [10]:
cycling_or_city_df.head()

Dataset(58 entries)
[30.11.2023 23:37] 3.5 city, Lauren, date, tired
[24.11.2023 21:50] 4.0 satisfied, happy, Damir, city, social, tired
[30.10.2023 18:07] 5.0 happy, date, grateful, city, Lauren, tired
[26.10.2023 22:06] 4.0 satisfied, happy, date, grateful, city, Lauren, walking
[25.10.2023 19:41] 4.0 happy, date, city, Lauren, walking
...


In [11]:
cycling_or_city_df.head(2)

Dataset(58 entries)
[30.11.2023 23:37] 3.5 city, Lauren, date, tired
[24.11.2023 21:50] 4.0 satisfied, happy, Damir, city, social, tired
...


In [12]:
# prints all entries
cycling_or_city_df.head(-1)

Dataset(58 entries)
[30.11.2023 23:37] 3.5 city, Lauren, date, tired
[24.11.2023 21:50] 4.0 satisfied, happy, Damir, city, social, tired
[30.10.2023 18:07] 5.0 happy, date, grateful, city, Lauren, tired
[26.10.2023 22:06] 4.0 satisfied, happy, date, grateful, city, Lauren, walking
[25.10.2023 19:41] 4.0 happy, date, city, Lauren, walking
[22.10.2023 21:01] 4.0 date, grateful, city, new place, Lauren, tired, excited
[21.10.2023 20:00] 4.0 satisfied, movies & series, date, city, relaxed, Lauren
[19.10.2023 14:35] 4.0 city, friends
[14.10.2023 20:00] 4.0 Lauren, city, tired, new place
[13.10.2023 20:10] 5.0 cycling, tired, swimming
[08.10.2023 20:00] 4.0 grateful, city, friends, Lauren, travel
[07.10.2023 15:56] 4.0 satisfied, amused, city, relaxed, reading, walking
[03.10.2023 20:01] 4.0 satisfied, happy, city, friends, Lauren, walking
[02.10.2023 20:01] 5.0 satisfied, happy, city, friends, Lauren, walking
[30.09.2023 13:29] 4.0 cycling, unsure, relaxed, shopping
[23.09.2023 13:55] 4.0 b

### `.mood`

Use the `.mood` method to get the average mood of all the entries in the dataset

In [13]:
round(
    cycling_or_city_df.mood(),
    3
)

4.233

### `.count`

Use the `.count` method to count entries with the given conditions.

This is equivalent to `len(df.sub(...))` with the same arguments, but the `.count` method does not create a new dataset.

In [14]:
df.count(incl_act='friends', mood={2, 3})

8

### `.activities`

Use the `.activities` method to get a Counter object of all activities in the dataset

In [15]:
df.activities().most_common(3)

[('home', 251), ('relaxed', 153), ('satisfied', 131)]

### `.get_datetimes`

Use the `.get_datetimes` method to get the list of all points in time when an entry was created

In [16]:
df.get_datetimes()[:5]

[datetime.datetime(2023, 12, 2, 20, 24),
 datetime.datetime(2023, 12, 2, 20, 18),
 datetime.datetime(2023, 12, 1, 20, 1),
 datetime.datetime(2023, 12, 1, 19, 25),
 datetime.datetime(2023, 12, 1, 0, 19)]

# Analysis examples

## Mood analysis

In [17]:
activity = 'cycling'
mood_with, mood_without = df.mood_with_without(activity)

In [18]:
print(f'''"{activity}"
with: {mood_with:.2f}
without: {mood_without:.2f}
change: {(mood_with - mood_without)/mood_without:.1%}'''
)

"cycling"
with: 4.38
without: 3.77
change: 16.1%


In [19]:
df.count(incl_act='cycling')

33

## Complete analysis

In [20]:
complete_analysis = df.complete_analysis()

In [21]:
print(f'analysed {len(complete_analysis)} actvities')
for _name, _with, _without, _change, _num_occ in complete_analysis:
    print(f'[{_name:^15}]: {_change:.1%} (with: {_with:.2f}, without: {_without:.2f}); occured {_num_occ} times')

analysed 40 actvities
[     happy     ]: 20.3% (with: 4.42, without: 3.68); occured 83 times
[   swimming    ]: 18.9% (with: 4.50, without: 3.78); occured 21 times
[   grateful    ]: 18.3% (with: 4.45, without: 3.76); occured 38 times
[     Damir     ]: 17.5% (with: 4.46, without: 3.80); occured 13 times
[     date      ]: 17.4% (with: 4.41, without: 3.75); occured 43 times
[    Lauren     ]: 16.6% (with: 4.35, without: 3.73); occured 61 times
[    cycling    ]: 16.1% (with: 4.38, without: 3.77); occured 33 times
[     city      ]: 12.0% (with: 4.24, without: 3.78); occured 34 times
[   satisfied   ]: 10.6% (with: 4.09, without: 3.70); occured 131 times
[    walking    ]: 9.0% (with: 4.14, without: 3.80); occured 21 times
[     piano     ]: 7.9% (with: 4.10, without: 3.80); occured 25 times
[movies & series]: 7.6% (with: 4.07, without: 3.79); occured 47 times
[   new place   ]: 7.6% (with: 4.10, without: 3.81); occured 10 times
[    relaxed    ]: 6.8% (with: 3.98, without: 3.73); occur

## Mood graph

In [22]:
df.mood_graph()