In [None]:
import pathlib
from src.dataset import Dataset, BAD_MOOD

DATA_DIR = pathlib.Path('data')
path = next(DATA_DIR.glob('*.csv'))
print('using file', path.name)

df = Dataset(path)
df.stats()

# API

### `.head`

Use the `.head` method to look at the latest entries of the dataset

In [None]:
df.head()

In [None]:
df.head(2)

In [None]:
# prints all entries
df.head(-1)

### `.sub`

Use the `.sub` method to filter entries and get a subset of the original dataset

- by included activities ('or' operator: all the entries which have at least one of the listed activities)

In [None]:
cycling_or_city_df = df.sub(include={'cycling', 'city'})
cycling_or_city_df.head()

- by excluded activities (entries which don't have any of the listed activities)

_for both `include` and `exclude` the argument can be either `str` or `set[str]`_

In [None]:
without_friends = df.sub(exclude='friends')
without_friends.head()

- by mood values (can be either a value or a Container of values)

In [None]:
bad_mood = df.sub(mood=BAD_MOOD)
bad_mood

- by included activities ('and' operator: all entries which have all the listed activities)

_Note: this is achieved by subscribing the dataset twice; since .sub method returns a new dataset, it can be done in one line._

In [None]:
cycling_and_swimming = df.sub(include='cycling').sub(include='swimming')
cycling_and_swimming.head()
print(cycling_and_swimming.activities().most_common(7))

In [None]:
df.sub(include='cycling', exclude='swimming')

- by a subtring (or substrings) in the `note`

In [None]:
df_ktane = df.sub(note_contains='ktane')
df_ktane.head(-1)

- by a predicate function (which takes an entry as an argument and returns a boolean)

In [None]:
from dataset import Entry


def pred(entry: Entry) -> bool:
    """It is sunday, the mood > 4, the note is not empty"""
    return entry.full_date.weekday() == 6 and entry.mood > 4 and entry.note != ""


df_sunday_good_mood_has_note = df.sub(predicate=pred)
df_sunday_good_mood_has_note

### `.mood` and `.std`

Use the `.mood` method to get the average mood of all the entries in the dataset and the `.std` method to get the standard deviation of the mood values.

These values are also shown when calling `.head()`.

In [None]:
print(f"{cycling_or_city_df.mood():.3f} \u00B1 {cycling_or_city_df.std():.3f}")

or just use `.mood_std`:

In [None]:
cycling_or_city_df.mood_std()

### `.activities`

Use the `.activities` method to get a Counter object of all activities in the dataset

In [None]:
cnt = df.activities()
print(f'most common: {cnt.most_common(3)}')
print(f'least common: {cnt.most_common()[-1:-6:-1]}')

### `.people`

Use the `.people` method to get a Counter object of all people in the dataset

In [None]:
df.people()

### `.get_datetimes`

Use the `.get_datetimes` method to get the list of all points in time when an entry was created. The values are sorted in descending order by the date of creation.

In [None]:
df.get_datetimes()[:5]

### `.group_by`
Use the `.group_by` method to get a dictionary mapping groups to the list of entries in that group.

The groups are one of `['day', 'month']` and the entries are sorted in ascending order.

Note: this method (like many others) uses lru_cache and since the Dataset is weakly-immutable, it is safe to use it.

In [None]:
groups = df.group_by('day')
for day, entries in groups.items():
    print(f'{day}: {len(entries)}')
    break

In [None]:
groups = df.group_by('month')
for day, entries in groups.items():
    print(f'{day}: {len(entries)}')
    break

### `.stats`

Use the `.stats` method to get a custom StatsResult object which contains the following information:

In [None]:
help(df.stats)

In [None]:
df.sub(include='home').stats()

### `for _ in df` (`.__iter__`)
`Dataset` defines `__iter__` method, so it can be used in `for` loops. The entries are sorted in descending order by the date of creation.

In [None]:
for entry in df:
    print(entry)
    break
# or
df_iter = iter(df)
print(next(df_iter), next(df_iter), sep='; ')

### `df[...]` (`.__getitem__`)
`Dataset` defines `__getitem__` method where the argument is 
- a date in a string format (e.g. `'01.01.2024'`), returns a new Dataset with all entries on that date;
- a slice object (e.g. `slice('01.01.2024', '01.01.2025')`), returns a new Dataset with all entries which were created between the two dates (the "stop" date is not included);

In [None]:
july22 = df['22.07.2023']
july22

- by a particular date period (by using slices)

In [None]:
ny_eve = df['29.12.2023':'03.01.2024']
ny_eve

In [None]:
end_of_november_2023 = df['29.11.2023':'01.12.2023']
end_of_november_2023.head()

In [None]:
before_aug_2023 = df[:'01.08.2023']
before_aug_2023.head()

### `df @ datetime-like` (or `.at`)
Returns a single entry created at a particular datetime or `None` if there is no entry at that datetime.

Input: a datetime in a string format (e.g. `'01.01.2024 12:00'`) or a `datetime.datetime` object.

In [None]:
import datetime

entry1 = df @ '22.07.2023 17:09' # or df.at(...)
entry2 = df @ datetime.datetime(2023, 7, 22, 17, 9) # or df.at(...)

assert entry1 == entry2
print(entry1)

# Analysis examples

## Mood analysis

In [None]:
activity = 'cycling'
mood_with_without = df.mood_with_without(activity)
print(repr(mood_with_without))
print(mood_with_without.calc_change())

In [None]:
print(activity, mood_with_without, sep='\n')

## Complete analysis

In [30]:
complete_analysis = df.complete_analysis()

In [None]:
print(f'analysed {len(complete_analysis)} actvities')
for _name, _mood_with_without, _num_occ in complete_analysis[:8] + complete_analysis[-8:]:
    print(f'[{_name:^15}]: {_mood_with_without.calc_change():.1%} (with: {_mood_with_without.with_}, without: {_mood_with_without.without}); occured {_num_occ} times')

## Tags

In [3]:
all_tags = df.build_tags()

In [None]:
for booktag in all_tags['книга']:
    print(booktag)

### Books Timeline

In [None]:
from IPython.core.display import HTML
from IPython.display import display

from src.books_timeline import get_timeline_html

book_tags = df.get_book_tags()

display(HTML(get_timeline_html(book_tags)))


## Mood scatter plot (per day/month)

In [None]:
df.mood_plot('day')

In [None]:
df.mood_plot('week')

In [None]:
df.mood_plot('month')

## Books Read

In [None]:
df.books_read_plot()

## Mood bar plot by hour/day/month

In [None]:
df.by_time_bar_plot('hour')

In [None]:
df.by_time_bar_plot('day')

In [None]:
df.by_time_bar_plot('weekday')

In [None]:
df.by_time_bar_plot('month')

## Calendar Heatmaps!

Now, these do look fancy, don't they!

In [None]:
df.show_calendar_plot()

Of course, we can also subset the dataset to, say, take a look at a particular activity

In [None]:
# when I was at home (some of my trips are clearly visible)
df.sub(include="home").show_calendar_plot()

In [None]:
# when I played factorio alone
df.sub(include="factorio", exclude=set(df.people().keys())).show_calendar_plot()

## Entries times differences

In [None]:
df.sub(include='home').entries_differences()

## Monthly activity effect on mood

In [None]:
df.mood_change_activity('study')

## Other features

In [None]:
df.note_length_plot()

In [2]:
df.generate_activity_correlation_matrix()

# Monthly Report Templates

In [None]:
from src.monthly_report import generate_report_template


generate_report_template(1, 2025, df)


# Other