In [1]:
import pathlib
from dataset import Dataset, BAD_MOOD, GOOD_MOOD, AVERAGE_MOOD

# Loading the dataset

In [2]:
df = Dataset(csv_file_path=pathlib.Path('data_2023_07_28.csv'))

Dataset(80 entries)


# API

### `.sub`

Use the `.sub` method to filter entries and get a subset of the original dataset

- by included activities ('or' operator: all the entries which have at least one of the listed activities)

In [7]:
cycling_df = df.sub(incl_act={'cycling', 'city'})
cycling_df.head()
print(cycling_df.mood())

Dataset(17 entries)
[2023-07-27 21:04] 4.0 satisfied, gaming, shopping, relaxed, cycling, cooking, home
[2023-07-25 20:21] 4.0 satisfied, cycling, shopping
[2023-07-21 21:17] 4.0 city, satisfied
[2023-07-20 18:29] 4.0 nice weather, cycling, unsure, shopping
[2023-07-18 20:30] 5.0 satisfied, city, relaxed, friends, cycling, tired, swimming
...
4.205882352941177


- by excluded activities (entries which don't have any of the listed activities)

In [5]:
without_friends = df.sub(excl_act={'friends'})
without_friends.head()
print(without_friends.mood())

Dataset(63 entries)
[2023-07-28 10:16] 4.0 procrastinating, relaxed, home
[2023-07-27 21:04] 4.0 cooking, home, shopping, gaming, satisfied, relaxed, cycling
[2023-07-27 16:02] 3.5 unsure, relaxed, home, study
[2023-07-27 15:54] 4.0 home, study, m, satisfied, relaxed
[2023-07-27 01:15] 3.5 tired, relaxed, home, m
...
3.5238095238095237


- by mood values (can be either a value or a Container of values)

In [9]:
bad_mood = df.sub(mood=BAD_MOOD)
bad_mood

Dataset(16 entries)

- by included activities ('and' operator: all entries which have all the listed activities)

In [8]:
 #* Note: this is achieved by subscribing the dataset twice; 
 #* since .sub method returns a new dataset, it can be done in one line
cycling_and_swimming = df.sub(incl_act={'cycling'}).sub(incl_act={'swimming'})
cycling_and_swimming.head()
print(cycling_and_swimming.mood())
print(cycling_and_swimming.activities().most_common(7))

Dataset(3 entries)
[2023-07-18 20:30] 5.0 tired, city, friends, swimming, satisfied, relaxed, cycling
[2023-07-14 19:56] 5.0 tired, study, city, friends, new place, swimming, procrastinating, relaxed, cycling
[2023-07-10 21:00] 5.0 tired, friends, nail biting, happy, swimming, satisfied, school, cycling
5.0
[('tired', 3), ('friends', 3), ('swimming', 3), ('cycling', 3), ('city', 2), ('satisfied', 2), ('relaxed', 2)]


- by a particular date

In [6]:
import datetime

july22 = df.sub(when=datetime.date(2023, 7, 22))
july22.head(n=-1)

Dataset(6 entries)
[2023-07-22 22:17] 3.5 shower, relaxed, home, m
[2023-07-22 19:34] 4.0 chess, relaxed, home
[2023-07-22 17:09] 3.5 relaxed, cooking, home, study
[2023-07-22 14:22] 3.5 unsure, coding
[2023-07-22 13:30] 3.0 friends, nail biting, home, dota, anxious
[2023-07-22 11:30] 4.0 satisfied, home


### `.head`

Use the `.head` method to look at the latest entries of the dataset

In [8]:
cycling_df.head()

Dataset(17 entries)
[2023-07-27 21:04] 4.0 satisfied, gaming, shopping, relaxed, cycling, cooking, home
[2023-07-25 20:21] 4.0 satisfied, cycling, shopping
[2023-07-21 21:17] 4.0 city, satisfied
[2023-07-20 18:29] 4.0 nice weather, cycling, unsure, shopping
[2023-07-18 20:30] 5.0 satisfied, city, relaxed, friends, cycling, tired, swimming
...


In [9]:
cycling_df.head(2)

Dataset(17 entries)
[2023-07-27 21:04] 4.0 satisfied, gaming, shopping, relaxed, cycling, cooking, home
[2023-07-25 20:21] 4.0 satisfied, cycling, shopping
...


In [11]:
# prints all entries
cycling_df.head(-1)

Dataset(17 entries)
[2023-07-27 21:04] 4.0 satisfied, gaming, shopping, relaxed, cycling, cooking, home
[2023-07-25 20:21] 4.0 satisfied, cycling, shopping
[2023-07-21 21:17] 4.0 city, satisfied
[2023-07-20 18:29] 4.0 nice weather, cycling, unsure, shopping
[2023-07-18 20:30] 5.0 satisfied, city, relaxed, friends, cycling, tired, swimming
[2023-07-18 14:29] 4.0 nice weather, satisfied, cycling, shopping
[2023-07-16 20:02] 4.0 chess, friends, cycling, cleaning, nail biting, home, dota, study
[2023-07-14 19:56] 5.0 city, relaxed, friends, cycling, new place, tired, swimming, procrastinating, study
[2023-07-13 23:59] 4.0 class, satisfied, gaming, shopping, relaxed, friends, cycling, nail biting, coding, new place, tired, procrastinating
[2023-07-13 12:47] 4.0 satisfied, cycling
[2023-07-11 20:02] 3.5 shopping, overheated, cycling, coding, bored, study
[2023-07-10 21:00] 5.0 satisfied, school, friends, cycling, nail biting, happy, tired, swimming
[2023-07-10 11:07] 4.0 school, satisfied, c

### `.mood`

Use the `.mood` method to get the average mood of all the entries in the dataset

In [14]:
round(
    cycling_df.mood(),
    3
)

4.273

### `.count`

Use the `.count` method to count entries with the given conditions.

This is equivalent to `len(df.sub(...))` with the same arguments, but the `.count` method does not create a new dataset.

In [8]:
df.count(incl_act={'friends'}, mood={2, 3})

2

### `.activities`

Use the `.activities` method to get a Counter object of all activities in the dataset

In [3]:
df.activities().most_common(3)

[('home', 54), ('relaxed', 26), ('nail biting', 25)]

### `.get_datetimes`

Use the `.get_datetimes` method to get the list of all points in time when an entry was created

In [4]:
df.get_datetimes()[:5]

[datetime.datetime(2023, 7, 28, 10, 16),
 datetime.datetime(2023, 7, 27, 21, 4),
 datetime.datetime(2023, 7, 27, 16, 2),
 datetime.datetime(2023, 7, 27, 15, 54),
 datetime.datetime(2023, 7, 27, 1, 15)]

# Analysis examples

## Mood analysis

In [34]:
activity = 'city'
mood_with, mood_without = df.analyse(activity)

In [35]:
print(f'''"{activity}"
with: {mood_with:.2f}
without: {mood_without:.2f}
change: {(mood_with - mood_without)/mood_without:.1%}'''
)

"city"
with: 4.43
without: 3.55
change: 24.8%


In [37]:
df.count(incl_act={'cycling'})

13