formatting

mohamed-chs · Oct 18, 2023 · 30c065f · 30c065f
1 parent 1449ddd
commit 30c065f
Show file tree

Hide file tree

Showing 18 changed files with 147 additions and 183 deletions.
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
diff --git a/TODO.md b/TODO.md
@@ -1,20 +1,20 @@
 ### TODO
 
-Doing what needs to be done.
+Doing what needs to be done. (not really needed, but I like to keep track of things)
 
 Feel free to add or check items.
 
 **general**
 
-- [ ] More robust testing setup (I only have my data to test on and I do it by way of the eyes, which isn't very sustainable)
+- [ ] More robust testing setup
 - [ ] GUI
 - [ ] Standalone executable, for an even smoother setup
 - [ ] Obsidian plugin ?
-- [x] keep external dependencies to a minimum (looking at you js)
+- [-] keep external dependencies to a minimum (looking at you js)
 - [x] Javascript to download more conversations, see [Javascript](js)
-- [ ] Add new downloaded conversations to the MD folder
+- [x] Add new downloaded conversations to the MD folder
 - [ ] Update past conversations if changed
-- [ ] Data visualizations : chat times, frequency, models, word clouds, etc...
+- [-] Data visualizations : chat times, frequency, models, word clouds, etc...
 - [ ] Data analysis : categories and more classifications based on topics, concepts, programming tools, etc ...
 - [ ] Integration with Obsidian (folders and subfolders, tags, ...)
 - [ ] Add HTML as an output option
@@ -27,22 +27,20 @@ Feel free to add or check items.
 **visualizations**
 
 - [ ] Rename fonts and colormaps to more human-readable names
-- [ ] Reduce the number of fonts and colormaps (test and dump the ugly ones)
-- [ ] Automatically make all word clouds for all time periods
+- [ ] Number of fonts and colormaps should be reasonable, not too many, not too few
 
 **command line**
 
 - [x] Nicer command line output formatting
-- [x] More configs from the command line (overwrite the config.json)
+- [x] More configs from the command line
 - [ ] Link to submit issues or feedback
 - [ ] add more todos ...
 
-**configs.json**
+**configs**
 
 - [x] change user, assistant, and system names
 - [x] yaml header elements
 - [ ] specific configs for each individual conversation / conversation type
-- [ ] output folder (currently set by default or via command line arguments)
 - [ ] add more configs ...
 
 See also : [JavaScript Todo](js/how_to_use.md#still-working-on)
diff --git a/controllers/configuration.py b/controllers/configuration.py
@@ -14,9 +14,9 @@
 
 def get_user_configs() -> dict[str, Any]:
     """Loads the default configs and calls the prompt_user function with those defaults.
-    Returns the new configuration."""
-
-    with open(file="config.json", mode="r", encoding="utf-8") as file:
+    Returns the new configuration.
+    """
+    with open(file="config.json", encoding="utf-8") as file:
         default_configs = load(fp=file)
 
     if not default_configs["zip_file"]:

diff --git a/controllers/data_analysis.py b/controllers/data_analysis.py
@@ -1,6 +1,7 @@
 """Module for all the data visualizations.
 
-Should ideally only return matplotlib objects, and not deal with the filesystem."""
+Should ideally only return matplotlib objects, and not deal with the filesystem.
+"""
 
 from pathlib import Path
 from typing import Any
@@ -20,7 +21,6 @@
 # Ensure that the stopwords are downloaded
 def load_nltk_stopwords() -> set[str]:
     """Loads the nltk stopwords. Returns a set of stopwords."""
-
     try:
         nltk.data.find(resource_name="corpora/stopwords")  # type: ignore
     except LookupError:
@@ -36,7 +36,9 @@ def load_nltk_stopwords() -> set[str]:
     ]  # add more languages here ...
 
     stop_words = set(
-        word for lang in languages for word in stopwords.words(fileids=lang)  # type: ignore
+        word
+        for lang in languages
+        for word in stopwords.words(fileids=lang)  # type: ignore
     )
 
     return stop_words
@@ -47,7 +49,6 @@ def wordcloud_from_text(
     **kwargs: Any,
 ) -> WordCloud:
     """Creates a wordcloud from the given text. Returns a WordCloud object."""
-
     custom_stopwords: list[str] = kwargs.get("stopwords", [])
     default_stopwords: set[str] = load_nltk_stopwords()
     stop_words: set[str] = default_stopwords.union(set(custom_stopwords))
@@ -61,7 +62,8 @@ def wordcloud_from_text(
 
     wordcloud: WordCloud = WordCloud(
         font_path=kwargs.get(
-            "font_path", "assets/fonts/ArchitectsDaughter-Regular.ttf"
+            "font_path",
+            "assets/fonts/ArchitectsDaughter-Regular.ttf",
         ),
         width=kwargs.get("width", 1000),
         height=kwargs.get("height", 1000),
@@ -71,17 +73,17 @@ def wordcloud_from_text(
         colormap=kwargs.get("colormap", "prism"),
         include_numbers=kwargs.get("include_numbers", False),
     ).generate(  # type: ignore
-        text=text
+        text=text,
     )
 
     return wordcloud
 
 
 def wordcloud_from_conversation_set(
-    conv_set: ConversationSet, **kwargs: Any
+    conv_set: ConversationSet,
+    **kwargs: Any,
 ) -> WordCloud:
     """Creates a wordcloud from the given conversation set. Returns a WordCloud object."""
-
     text: str = (
         conv_set.all_author_text(author="user")
         + "\n"
@@ -93,7 +95,6 @@ def wordcloud_from_conversation_set(
 
 def create_save_graph(timestamps: list[float], file_path: Path) -> None:
     """Creates and saves a graph from the given timestamps."""
-
     df = pd.DataFrame(data=timestamps, columns=["timestamp"])  # type: ignore
     df["datetime"] = pd.to_datetime(arg=df["timestamp"], unit="s")  # type: ignore
 
@@ -113,7 +114,10 @@ def create_save_graph(timestamps: list[float], file_path: Path) -> None:
     )
 
     plt.title(  # type: ignore
-        label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20
+        label="ChatGPT Prompts per Day",
+        fontsize=20,
+        fontweight="bold",
+        pad=20,
     )
     plt.xlabel(xlabel="Month", fontsize=16, labelpad=15)  # type: ignore
     plt.ylabel(ylabel="Number of Prompts", fontsize=16, labelpad=15)  # type: ignore

diff --git a/controllers/file_system.py b/controllers/file_system.py
@@ -4,7 +4,8 @@
 
 (besides utils.py, but that doesn't save anything to disk,
 and configuration.py, but that's a placeholder for user input in whatever form,
-may be replaced later, with a GUI or something)"""
+may be replaced later, with a GUI or something)
+"""
 
 from datetime import datetime
 from json import dump, load
@@ -23,24 +24,22 @@
 
 def load_conversations_from_openai_zip(zip_filepath: Path) -> ConversationSet:
     """Load the conversations from the OpenAI zip export file."""
-
     with ZipFile(file=zip_filepath, mode="r") as file:
         file.extractall(path=zip_filepath.with_suffix(suffix=""))
 
     conversations_path: Path = (
         zip_filepath.with_suffix(suffix="") / "conversations.json"
     )
 
-    with open(file=conversations_path, mode="r", encoding="utf-8") as file:
+    with open(file=conversations_path, encoding="utf-8") as file:
         conversations = load(fp=file)
 
     return ConversationSet(conversations=conversations)
 
 
 def load_conversations_from_bookmarklet_json(json_filepath: Path) -> ConversationSet:
     """Load the conversations from the bookmarklet json export file."""
-
-    with open(file=json_filepath, mode="r", encoding="utf-8") as file:
+    with open(file=json_filepath, encoding="utf-8") as file:
         conversations = load(fp=file)
 
     return ConversationSet(conversations=conversations)
@@ -54,7 +53,7 @@ def save_conversation_to_file(conversation: Conversation, filepath: Path) -> Non
     while filepath.exists():
         counter += 1
         filepath = filepath.with_name(
-            name=f"{base_file_name} ({counter}){filepath.suffix}"
+            name=f"{base_file_name} ({counter}){filepath.suffix}",
         )
 
     with open(file=filepath, mode="w", encoding="utf-8") as file:
@@ -65,7 +64,8 @@ def save_conversation_to_file(conversation: Conversation, filepath: Path) -> Non
 def save_conversation_set_to_dir(conv_set: ConversationSet, dir_path: Path) -> None:
     """Save a conversation set to a directory, one markdown file per conversation."""
     for conversation in tqdm(
-        iterable=conv_set.conversation_list, desc="Writing Markdown 📄 files"
+        iterable=conv_set.conversation_list,
+        desc="Writing Markdown 📄 files",
     ):
         file_path: Path = dir_path / f"{conversation.sanitized_title()}.md"
         save_conversation_to_file(conversation=conversation, filepath=file_path)
@@ -89,21 +89,23 @@ def save_wordcloud_from_conversation_set(
             raise ValueError("Invalid time period for wordcloud")
 
     wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file(  # type: ignore
-        filename=dir_path / file_name
+        filename=dir_path / file_name,
     )
 
 
 def generate_all_wordclouds(
-    conv_set: ConversationSet, dir_path: Path, **kwargs: Any
+    conv_set: ConversationSet,
+    dir_path: Path,
+    **kwargs: Any,
 ) -> None:
     """Create the wordclouds and save them to the folder."""
-
     weeks_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_week()
     months_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_month()
     years_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_year()
 
     for week in tqdm(
-        iterable=weeks_dict.keys(), desc="Creating weekly wordclouds 🔡☁️ "
+        iterable=weeks_dict.keys(),
+        desc="Creating weekly wordclouds 🔡☁️ ",
     ):
         save_wordcloud_from_conversation_set(
             conv_set=weeks_dict[week],
@@ -113,7 +115,8 @@ def generate_all_wordclouds(
         )
 
     for month in tqdm(
-        iterable=months_dict.keys(), desc="Creating monthly wordclouds 🔡☁️ "
+        iterable=months_dict.keys(),
+        desc="Creating monthly wordclouds 🔡☁️ ",
     ):
         save_wordcloud_from_conversation_set(
             conv_set=months_dict[month],
@@ -123,7 +126,8 @@ def generate_all_wordclouds(
         )
 
     for year in tqdm(
-        iterable=years_dict.keys(), desc="Creating yearly wordclouds 🔡☁️ "
+        iterable=years_dict.keys(),
+        desc="Creating yearly wordclouds 🔡☁️ ",
     ):
         save_wordcloud_from_conversation_set(
             conv_set=years_dict[year],
@@ -135,23 +139,22 @@ def generate_all_wordclouds(
 
 def save_custom_instructions_to_file(conv_set: ConversationSet, filepath: Path) -> None:
     """Create JSON file for custom instructions in the conversation set."""
-
     with open(file=filepath, mode="w", encoding="utf-8") as file:
         dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2)
 
 
 def default_output_folder() -> str:
     """Returns the default output folder path.
 
-    (put the function in a separate file to isolate file system operations)"""
-
+    (put the function in a separate file to isolate file system operations)
+    """
     return str(object=Path.home() / "Documents" / "ChatGPT Data")
 
 
 def get_openai_zip_filepath() -> str:
     """Returns the path to the most recent zip file in the Downloads folder,
-    excluding those containing 'bookmarklet'."""
-
+    excluding those containing 'bookmarklet'.
+    """
     downloads_folder: Path = Path.home() / "Downloads"
 
     # Filter out zip files with names that contain "bookmarklet"
@@ -170,8 +173,8 @@ def get_openai_zip_filepath() -> str:
 
 def get_bookmarklet_json_filepath() -> Path | None:
     """Returns the path to the most recent json file in the Downloads folder,
-    containing 'bookmarklet'."""
-
+    containing 'bookmarklet'.
+    """
     downloads_folder: Path = Path.home() / "Downloads"
 
     # Filter out json files with names that do not contain "bookmarklet"
@@ -184,7 +187,8 @@ def get_bookmarklet_json_filepath() -> Path | None:
 
     # Most recent json file in downloads folder, containing "bookmarklet"
     bookmarklet_json_filepath: Path = max(
-        bookmarklet_json_files, key=lambda x: x.stat().st_ctime
+        bookmarklet_json_files,
+        key=lambda x: x.stat().st_ctime,
     )
 
     return bookmarklet_json_filepath
diff --git a/js/how_to_use.md b/js/how_to_use.md
@@ -27,6 +27,4 @@ Feel free to modify the script to your liking. Would also appreciate sharing the
 - [ ] better widget UI (add error messages and progress and such,
       so you can close the dev console and still be kept informed on the download process)
 - [ ] add instructions on how to create a bookmarklet
-      (how to minify the js script, make it url valid, then creating the bookmark in the browser.
-      Maybe do all these in-house? but that might need the uglify-js npm dependency ...)
 - [ ] more todos ...