In [1]:
from datetime import datetime, timezone
from uuid import uuid4

import pandas as pd
import tqdm

## Habit Format

In [4]:
# The exported CSV from Habitude does not contain a header, so we define it here.
# Some columns are still unknown though.

header = [str(i) for i in range(12)]
header[0] = "id"
header[1] = "name"
# h[2] = "is_progressive?"  # or might be the target value for progressive habits
header[3] = "description"
header[4] = "color_id"
header[5] = "creation_date"
header[6] = (
    "goal"  # 0 means daily; 2:X means X times per week; there are likely further formats
)
# h[7] = "some_str"
# h[8] = "some_float"
header[9] = "unit"
header[10] = (
    "date_quantity"  # "-" separated list of "{YYYYMMDD}:{quantity}" (unordered by the way)
)
# h[11] = "exceedable?"  # or target_quantity for progressive habits?


habits_df = pd.read_csv("./habitude.csv", header=None, names=header)
# Sort by name
habits_df = habits_df.sort_values(by=["name"])
habits_df

Unnamed: 0,id,name,2,description,color_id,creation_date,goal,7,8,unit,date_quantity,11
0,1,01. Film/Series/Game,0,,3,2019-02-21-0:0:00.0000,2:4,,1.0,,20210329:1-20220103:1-20201008:1-20210314:1-20...,0
15,24,02. Allergy,0,,17,2022-05-11-0:0:00.0000,0,,1.0,,20220521:1-20250613:1-20220611:1-20230527:1-20...,0
1,2,03. Write,0,Hast du heute geschrieben?,3,2018-10-10-0:0:00.0000,0,,1.0,,20220103:1-20230629:1-20181021:1-20220507:1-20...,0
18,27,04. Cutsie Book,0,,8,2023-01-14-0:0:00.0000,0,,1.0,,20230308:1-20230312:1-20230313:1-20230119:1-20...,0
17,26,05. VitD,0,,16,2022-09-13-0:0:00.0000,0,,1.0,,20230108:1-20230227:1-20230204:1-20221223:1-20...,0
3,4,07. Finance,0,,5,2019-03-17-0:0:00.0000,2:1,,1.0,,20220521:1-20211202:1-20230108:1-20190823:1-20...,0
4,5,08. Music,0,Hast du heute Musik gemacht?,4,2018-10-10-0:0:00.0000,0,,1.0,,20190209:1-20190607:1-20190911:1-20240811:1-20...,0
6,7,09. Read,0,Hast du heute gelesen? (30min+),13,2018-10-10-0:0:00.0000,0,,1.0,,20190209:1-20240714:1-20220626:1-20231212:1-20...,0
2,3,10. Sport,0,,6,2019-02-23-0:0:00.0000,2:2,,1.0,,20200105:1-20191026:1-20210202:1-20200913:1-20...,0
5,6,11. NoX,0,Did you resist a specific urge,14,2018-10-2-0:0:00.0000,2:5,,1.0,,20191120:1-20230114:1-20190326:1-20221228:1-20...,0


In [5]:
header

['id',
 'name',
 '2',
 'description',
 'color_id',
 'creation_date',
 'goal',
 '7',
 '8',
 'unit',
 'date_quantity',
 '11']

In [6]:
def parse_date_to_quantity(habit_row):
    return {
        date: int(qty)
        for date, qty in (
            entry.split(":") for entry in sorted(habit_row["date_quantity"].split("-"))
        )
    }


parse_date_to_quantity(habits_df.iloc[0])

{'20190222': 1,
 '20190223': 1,
 '20190226': 1,
 '20190227': 1,
 '20190301': 1,
 '20190307': 1,
 '20190309': 1,
 '20190316': 1,
 '20190320': 1,
 '20190322': 1,
 '20190323': 1,
 '20190325': 1,
 '20190327': 1,
 '20190328': 1,
 '20190401': 1,
 '20190402': 1,
 '20190403': 1,
 '20190404': 1,
 '20190406': 1,
 '20190407': 1,
 '20190408': 1,
 '20190409': 1,
 '20190410': 1,
 '20190412': 1,
 '20190413': 1,
 '20190414': 1,
 '20190415': 1,
 '20190417': 1,
 '20190419': 1,
 '20190420': 1,
 '20190421': 1,
 '20190423': 1,
 '20190424': 1,
 '20190426': 1,
 '20190429': 1,
 '20190430': 1,
 '20190502': 1,
 '20190503': 1,
 '20190506': 1,
 '20190507': 1,
 '20190508': 1,
 '20190510': 1,
 '20190512': 1,
 '20190513': 1,
 '20190514': 1,
 '20190515': 1,
 '20190516': 1,
 '20190517': 1,
 '20190519': 1,
 '20190520': 1,
 '20190524': 1,
 '20190525': 1,
 '20190526': 1,
 '20190527': 1,
 '20190528': 1,
 '20190530': 1,
 '20190604': 1,
 '20190605': 1,
 '20190607': 1,
 '20190609': 1,
 '20190610': 1,
 '20190612': 1,
 '201906

## Streaks Format

In [9]:
streaks_df = pd.read_csv("./examples/streaks_example.csv")
streaks_df

Unnamed: 0,task_id,title,icon,entry_type,entry_date,entry_timestamp,entry_timezone,quantity,page,notes
0,01D26207-B0D5-4906-AC75-4D3606074A1E,Journaling,ic_pen_quill,missed_auto,20250911,2025-09-12T13:53:20Z,Europe/Berlin,0.0,0,
1,01D26207-B0D5-4906-AC75-4D3606074A1E,Journaling,ic_pen_quill,completed_manually,20250912,2025-09-12T13:53:20Z,Europe/Berlin,0.0,0,
2,01D26207-B0D5-4906-AC75-4D3606074A1E,Journaling,ic_pen_quill,completed_manually,20250913,2025-09-13T17:05:34Z,Europe/Berlin,0.0,0,


Header Format:
- `task_id`: UUID per task/habit
- `title`: Name of the habit
- `icon`: Icon representing the habit (e.g., `ic_pen_quill`)
- `entry_type`: Type of entry, currently known values are `completed_manually` and `missed_auto`
- `entry_date`: Date of the task completion aka (where to render the checkmark/cross)
- `entry_timestamp`: Timestamp of when the entry was recorded
- `entry_timezone`: Timezone of the entry (e.g., `Europe/Berlin`)
- `quantity`: Quantity associated with the entry, currently always `0.0`
- `page`: Page number, I guess this must be [0-3]
- `notes`: Additional notes

Whats missing:
- Target frequency and quantity per habit

There is also a `*.streaks` file (json) that has more metadata but the property names are not self-explanatory.

In [10]:
streaks_df.columns

Index(['task_id', 'title', 'icon', 'entry_type', 'entry_date',
       'entry_timestamp', 'entry_timezone', 'quantity', 'page', 'notes'],
      dtype='object')

## Conversion

- new UUID per task and maybe a default icon?
- we probably want `completed_manually` for all converted entries


In [12]:
for row in habits_df.itertuples():
    print(row.name)

01. Film/Series/Game
02. Allergy
03. Write
04. Cutsie Book
05. VitD
07. Finance
08. Music
09. Read
10. Sport
11. NoX
12. Sex
13. NoCaff
13.1 Caff
14. VeggieDay
15. Photography/Edit
16. Take a walk and free the mind
17. Reflection/Personal Growth
19. Headache
20. Ibu
21. Alcohol


In [13]:
def habit_to_streak(df, name_remap: dict = {}):
    # Default values
    icon = "ic_pen_quill"
    entry_type = "completed_manually"
    entry_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
    entry_timezone = timezone.utc
    page = 0  # Streaks automatically overflows to the next page
    note = "imported"

    output = []
    for row in tqdm.tqdm(df.itertuples()):
        task_id = str(uuid4()).upper()
        title = name_remap.get(row.name, row.name)

        for date, quantity in (
            entry.split(":") for entry in sorted(row.date_quantity.split("-"))
        ):
            output.append(
                {
                    "task_id": task_id,
                    "title": title,
                    "icon": icon,
                    "entry_type": entry_type,
                    "entry_date": date,
                    "entry_timestamp": entry_timestamp,
                    "entry_timezone": entry_timezone,
                    "quantity": quantity,
                    "page": page,
                    "notes": note,
                }
            )

    return pd.DataFrame(output)

In [14]:
remap = {name: name.split(" ", 1)[1] for name in habits_df["name"].to_list()}

converted_df = habit_to_streak(habits_df, remap)

20it [00:00, 2060.68it/s]


In [15]:
remap

{'01. Film/Series/Game': 'Film/Series/Game',
 '02. Allergy': 'Allergy',
 '03. Write': 'Write',
 '04. Cutsie Book': 'Cutsie Book',
 '05. VitD': 'VitD',
 '07. Finance': 'Finance',
 '08. Music': 'Music',
 '09. Read': 'Read',
 '10. Sport': 'Sport',
 '11. NoX': 'NoX',
 '12. Sex': 'Sex',
 '13. NoCaff': 'NoCaff',
 '13.1 Caff': 'Caff',
 '14. VeggieDay': 'VeggieDay',
 '15. Photography/Edit': 'Photography/Edit',
 '16. Take a walk and free the mind': 'Take a walk and free the mind',
 '17. Reflection/Personal Growth': 'Reflection/Personal Growth',
 '19. Headache': 'Headache',
 '20. Ibu': 'Ibu',
 '21. Alcohol': 'Alcohol'}

In [16]:
# converted_df.to_csv("output_for_streaks.csv")