In [46]:
import csv
from dataclasses import dataclass
import datetime
import pathlib

In [159]:
from collections import Counter


MOOD_VALUES = {'bad': 1, 'meh': 2, 'ok': 3, 'good': 4, 'great': 5}
DT_FORMAT = r"%Y-%m-%d %H:%M"


@dataclass
class Entry:
    full_date: datetime.datetime
    mood: int
    activities: set
    note: str

    def __repr__(self) -> str:
        return f'[{self.full_date.strftime(DT_FORMAT)}] {self.mood} {", ".join(self.activities)}'
    
    def check_condition(self, incl_act: set[str],
                   excl_act: set[str], 
                   when: datetime.date | None, 
                   mood: int | tuple[int|None, int|None] | None) -> bool:
        return (True if not incl_act else incl_act & self.activities) and \
            (not excl_act & self.activities) and \
            (True if when is None else self.full_date.date() == when) and \
            (True if mood is None else mood == self.mood)


class Dataset:
    def _from_csv_file(self, csv_file_path: str | pathlib.Path):
        self.entries: list[Entry] = []
        with open(csv_file_path, 'r', encoding='utf-8-sig') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                self.entries.append(self._get_entry(row))

    def __init__(self, *, csv_file_path: str | pathlib.Path | None = None, entries: list[Entry] | None = None) -> None:
        if csv_file_path is not None:
            self._from_csv_file(csv_file_path)
            print(f'successful: {len(self.entries)} entries')
        elif entries is not None:
            self.entries = entries
        else:
            self.entries = 0
    
    def __repr__(self) -> str:
        return f'Dataset({len(self.entries)} entries)'

    def __getitem__(self, idx: int) -> Entry:
        return self.entries[idx]
    
    def __iter__(self):
        return iter(self.entries)
    
    def _get_entry(self, row: dict) -> Entry:
        datetime_str = row['full_date'] + ' ' + row['time']
        return Entry(
            full_date=datetime.datetime.strptime(datetime_str, DT_FORMAT),
            mood=MOOD_VALUES.get(row['mood']),
            activities=set(row['activities'].split(' | ')),
            note=row['note']
        )
    
    def get_subset(self, 
                   incl_act: set[str] = set(),
                   excl_act: set[str] = set(), 
                   when: datetime.date | None = None, 
                   mood: int | tuple[int|None, int|None] | None = None) -> 'Dataset':
        filtered_entries = []
        for e in self:
            if e.check_condition(incl_act, excl_act, when, mood):
                filtered_entries.append(e)
        return Dataset(entries=filtered_entries)
    
    def count(self, incl_act: set[str] = set(),
                   excl_act: set[str] = set(), 
                   when: datetime.date | None = None, 
                   mood: int | tuple[int|None, int|None] | None = None) -> int:
        return sum(1 for e in self if e.check_condition(incl_act, excl_act, when, mood))

    def avg_mood(self) -> float:
        return sum(e.mood for e in self)/len(self.entries)
    
    def count_activities(self) -> Counter:
        c = Counter()
        for e in self:
            c.update(e.activities)
        return c
    
    def head(self, n: int = 5) -> None:
        for e in self.entries[:n]:
            print(e)
        if len(self.entries) > n:
            print('...')
        print(self)

In [160]:
df = Dataset(csv_file_path=pathlib.Path('data_2023_07_18.csv'))

successful: 32 entries


In [161]:
df.head()
df.avg_mood()

[2023-07-17 21:37] 2 angry, dota2
[2023-07-17 20:29] 4 laughing, chess
[2023-07-17 19:17] 3 tired, study, home (never left)
[2023-07-17 16:10] 3 excited, nail biting, stressed, home (never left), chess, cooking
[2023-07-16 20:02] 4 study, nail biting, friends, home (never left), chess, dota2, cycling, cleaning
...
Dataset(32 entries)


3.6875

In [162]:
df.count(incl_act={'friends'})

12

In [165]:
df.count_activities().most_common(4)

[('nail biting', 13), ('friends', 12), ('coding', 11), ('Satisfied', 10)]

In [167]:
cycling_df = df.get_subset(incl_act={'cycling'})

In [168]:
cycling_df.head()
cycling_df.avg_mood()

[2023-07-16 20:02] 4 study, nail biting, friends, home (never left), chess, dota2, cycling, cleaning
[2023-07-14 19:56] 5 relaxed, study, city, tired, friends, cycling, new place, procrastinating, Swimming
[2023-07-13 23:59] 4 relaxed, nail biting, coding, tired, friends, shopping, gaming, Satisfied, cycling, new place, procrastinating, class
[2023-07-13 12:47] 4 cycling, Satisfied
[2023-07-11 20:02] 3 study, overheated, coding, bored, shopping, cycling
...
Dataset(8 entries)


4.25

In [169]:
cycling_df.count_activities()

Counter({'cycling': 8,
         'Satisfied': 5,
         'friends': 4,
         'tired': 4,
         'study': 3,
         'nail biting': 3,
         'relaxed': 3,
         'city': 2,
         'new place': 2,
         'procrastinating': 2,
         'Swimming': 2,
         'coding': 2,
         'shopping': 2,
         'school': 2,
         'home (never left)': 1,
         'chess': 1,
         'dota2': 1,
         'cleaning': 1,
         'gaming': 1,
         'class': 1,
         'overheated': 1,
         'bored': 1,
         'happy': 1})

In [153]:
df.get_subset(mood=5).head()

[2023-07-14 19:56] 5 relaxed, study, city, tired, friends, cycling, new place, procrastinating, Swimming
[2023-07-10 21:00] 5 nail biting, tired, friends, school, happy, Satisfied, cycling, Swimming
[2023-07-08 19:13] 5 relaxed, city, tired, Satisfied, cycling
[2023-07-03 14:56] 5 happy, relaxed, class, social
Dataset(4 entries)
