~

mohamed-chs · Oct 17, 2023 · 1449ddd · 1449ddd
1 parent ced6429
commit 1449ddd
Show file tree

Hide file tree

Showing 10 changed files with 77 additions and 93 deletions.
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -4,8 +4,8 @@
         "usernamehw.errorlens",
         "njqdev.vscode-python-typehint",
         "ms-python.python",
-        "ms-python.pylint",
         "matangover.mypy",
-        "ms-toolsai.jupyter"
+        "ms-toolsai.jupyter",
+        "charliermarsh.ruff"
     ]
 }
diff --git a/controllers/data_analysis.py b/controllers/data_analysis.py
@@ -9,6 +9,7 @@
 import matplotlib.pyplot as plt
 import nltk  # type: ignore
 import pandas as pd
+from matplotlib.axes import Axes
 from nltk.corpus import stopwords  # type: ignore
 from pandas.core.series import Series
 from wordcloud import WordCloud  # type: ignore
@@ -77,23 +78,23 @@ def wordcloud_from_text(
 
 
 def wordcloud_from_conversation_set(
-    conversation_set: ConversationSet, **kwargs: Any
+    conv_set: ConversationSet, **kwargs: Any
 ) -> WordCloud:
     """Creates a wordcloud from the given conversation set. Returns a WordCloud object."""
 
     text: str = (
-        conversation_set.all_author_text(author="user")
+        conv_set.all_author_text(author="user")
         + "\n"
-        + conversation_set.all_author_text(author="assistant")
+        + conv_set.all_author_text(author="assistant")
     )
 
     return wordcloud_from_text(text=text, **kwargs)
 
 
-def create_save_graph(all_timestamps: list[float], file_path: Path) -> None:
+def create_save_graph(timestamps: list[float], file_path: Path) -> None:
     """Creates and saves a graph from the given timestamps."""
 
-    df = pd.DataFrame(data=all_timestamps, columns=["timestamp"])  # type: ignore
+    df = pd.DataFrame(data=timestamps, columns=["timestamp"])  # type: ignore
     df["datetime"] = pd.to_datetime(arg=df["timestamp"], unit="s")  # type: ignore
 
     daily_counts: Series = df.groupby(by=df["datetime"].dt.date).size()  # type: ignore
@@ -111,13 +112,15 @@ def create_save_graph(all_timestamps: list[float], file_path: Path) -> None:
         markeredgewidth=0.5,
     )
 
-    plt.title(label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20)  # type: ignore
+    plt.title(  # type: ignore
+        label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20
+    )
     plt.xlabel(xlabel="Month", fontsize=16, labelpad=15)  # type: ignore
     plt.ylabel(ylabel="Number of Prompts", fontsize=16, labelpad=15)  # type: ignore
     plt.xticks(fontsize=14)  # type: ignore
     plt.yticks(fontsize=14)  # type: ignore
 
-    ax = plt.gca()  # type: ignore
+    ax: Axes = plt.gca()
     ax.xaxis.set_major_locator(locator=mdates.MonthLocator())  # type: ignore
     ax.xaxis.set_major_formatter(formatter=mdates.DateFormatter(fmt="%B"))  # type: ignore
 

diff --git a/controllers/file_system.py b/controllers/file_system.py
@@ -6,7 +6,6 @@
 and configuration.py, but that's a placeholder for user input in whatever form,
 may be replaced later, with a GUI or something)"""
 
-
 from datetime import datetime
 from json import dump, load
 from os import utime
@@ -42,41 +41,39 @@ def load_conversations_from_bookmarklet_json(json_filepath: Path) -> Conversatio
     """Load the conversations from the bookmarklet json export file."""
 
     with open(file=json_filepath, mode="r", encoding="utf-8") as file:
-        conversations = load(file)
+        conversations = load(fp=file)
 
     return ConversationSet(conversations=conversations)
 
 
-def save_conversation_to_file(conversation: Conversation, file_path: Path) -> None:
+def save_conversation_to_file(conversation: Conversation, filepath: Path) -> None:
     """Save a conversation to a file, with added modification time."""
-    base_file_name: str = file_path.stem
+    base_file_name: str = filepath.stem
 
     counter = 0
-    while file_path.exists():
+    while filepath.exists():
         counter += 1
-        file_path = file_path.with_name(
-            name=f"{base_file_name} ({counter}){file_path.suffix}"
+        filepath = filepath.with_name(
+            name=f"{base_file_name} ({counter}){filepath.suffix}"
         )
 
-    with open(file=file_path, mode="w", encoding="utf-8") as file:
+    with open(file=filepath, mode="w", encoding="utf-8") as file:
         file.write(conversation.to_markdown())
-    utime(path=file_path, times=(conversation.update_time, conversation.update_time))
+    utime(path=filepath, times=(conversation.update_time, conversation.update_time))
 
 
-def save_conversation_set_to_dir(
-    conversation_set: ConversationSet, dir_path: Path
-) -> None:
+def save_conversation_set_to_dir(conv_set: ConversationSet, dir_path: Path) -> None:
     """Save a conversation set to a directory, one markdown file per conversation."""
     for conversation in tqdm(
-        iterable=conversation_set.conversation_list, desc="Writing Markdown 📄 files"
+        iterable=conv_set.conversation_list, desc="Writing Markdown 📄 files"
     ):
         file_path: Path = dir_path / f"{conversation.sanitized_title()}.md"
-        save_conversation_to_file(conversation=conversation, file_path=file_path)
+        save_conversation_to_file(conversation=conversation, filepath=file_path)
 
 
 def save_wordcloud_from_conversation_set(
-    conversation_set: ConversationSet,
-    folder_path: Path,
+    conv_set: ConversationSet,
+    dir_path: Path,
     time_period: tuple[datetime, str],
     **kwargs: Any,
 ) -> None:
@@ -91,26 +88,26 @@ def save_wordcloud_from_conversation_set(
         case _:
             raise ValueError("Invalid time period for wordcloud")
 
-    wordcloud_from_conversation_set(conversation_set=conversation_set, **kwargs).to_file(  # type: ignore
-        filename=folder_path / file_name
+    wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file(  # type: ignore
+        filename=dir_path / file_name
     )
 
 
 def generate_all_wordclouds(
-    conversation_set: ConversationSet, folder_path: Path, **kwargs: Any
+    conv_set: ConversationSet, dir_path: Path, **kwargs: Any
 ) -> None:
     """Create the wordclouds and save them to the folder."""
 
-    weeks_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_week()
-    months_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_month()
-    years_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_year()
+    weeks_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_week()
+    months_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_month()
+    years_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_year()
 
     for week in tqdm(
         iterable=weeks_dict.keys(), desc="Creating weekly wordclouds 🔡☁️ "
     ):
         save_wordcloud_from_conversation_set(
-            conversation_set=weeks_dict[week],
-            folder_path=folder_path,
+            conv_set=weeks_dict[week],
+            dir_path=dir_path,
             time_period=(week, "week"),
             **kwargs,
         )
@@ -119,8 +116,8 @@ def generate_all_wordclouds(
         iterable=months_dict.keys(), desc="Creating monthly wordclouds 🔡☁️ "
     ):
         save_wordcloud_from_conversation_set(
-            conversation_set=months_dict[month],
-            folder_path=folder_path,
+            conv_set=months_dict[month],
+            dir_path=dir_path,
             time_period=(month, "month"),
             **kwargs,
         )
@@ -129,20 +126,18 @@ def generate_all_wordclouds(
         iterable=years_dict.keys(), desc="Creating yearly wordclouds 🔡☁️ "
     ):
         save_wordcloud_from_conversation_set(
-            conversation_set=years_dict[year],
-            folder_path=folder_path,
+            conv_set=years_dict[year],
+            dir_path=dir_path,
             time_period=(year, "year"),
             **kwargs,
         )
 
 
-def save_custom_instructions_to_file(
-    conversation_set: ConversationSet, file_path: Path
-) -> None:
+def save_custom_instructions_to_file(conv_set: ConversationSet, filepath: Path) -> None:
     """Create JSON file for custom instructions in the conversation set."""
 
-    with open(file=file_path, mode="w", encoding="utf-8") as file:
-        dump(obj=conversation_set.all_custom_instructions(), fp=file, indent=2)
+    with open(file=filepath, mode="w", encoding="utf-8") as file:
+        dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2)
 
 
 def default_output_folder() -> str:

diff --git a/main.py b/main.py
@@ -64,7 +64,7 @@ def main() -> None:
                 json_filepath=bookmarklet_json_filepath
             )
         )
-        all_conversations_set.update(conversation_set=bookmarklet_conversations_set)
+        all_conversations_set.update(conv_set=bookmarklet_conversations_set)
 
     output_folder = Path(configs_dict["output_folder"])
 
@@ -78,7 +78,7 @@ def main() -> None:
     markdown_folder.mkdir(parents=True, exist_ok=True)
 
     save_conversation_set_to_dir(
-        conversation_set=all_conversations_set, dir_path=markdown_folder
+        conv_set=all_conversations_set, dir_path=markdown_folder
     )
 
     print(f"\nDone ✅ ! Check the output 📄 here : {markdown_folder.as_uri()} 🔗\n")
@@ -91,9 +91,7 @@ def main() -> None:
     graph_path: Path = graph_folder / "all messages.png"
 
     create_save_graph(
-        all_timestamps=all_conversations_set.all_author_message_timestamps(
-            author="user"
-        ),
+        timestamps=all_conversations_set.all_author_message_timestamps(author="user"),
         file_path=graph_path,
     )
 
@@ -107,8 +105,8 @@ def main() -> None:
     colormap = configs_dict["wordcloud"]["colormap"]
 
     generate_all_wordclouds(
-        conversation_set=all_conversations_set,
-        folder_path=wordcloud_folder,
+        conv_set=all_conversations_set,
+        dir_path=wordcloud_folder,
         font_path=font_path,
         colormap=colormap,
     )
@@ -120,7 +118,7 @@ def main() -> None:
     custom_instructions_filepath: Path = output_folder / "custom_instructions.json"
 
     save_custom_instructions_to_file(
-        conversation_set=all_conversations_set, file_path=custom_instructions_filepath
+        conv_set=all_conversations_set, filepath=custom_instructions_filepath
     )
 
     print(

diff --git a/models/conversation_set.py b/models/conversation_set.py
@@ -28,20 +28,20 @@ def __init__(self, conversations: list[dict[str, Any]]) -> None:
 
         self.conversation_list = list(self.conversation_dict.values())
 
-    def add_conversation(self, conversation: Conversation) -> None:
+    def add_conversation(self, conv: Conversation) -> None:
         """Add a conversation to the dictionary and list."""
-        self.conversation_dict[conversation.conversation_id] = conversation
-        self.conversation_list.append(conversation)
+        self.conversation_dict[conv.conversation_id] = conv
+        self.conversation_list.append(conv)
 
     def last_updated(self) -> float:
         """Returns the timestamp of the last updated conversation in the list."""
         return max(conversation.update_time for conversation in self.conversation_list)
 
-    def update(self, conversation_set: "ConversationSet") -> None:
+    def update(self, conv_set: "ConversationSet") -> None:
         """Update the conversation set with the new one."""
-        if conversation_set.last_updated() <= self.last_updated():
+        if conv_set.last_updated() <= self.last_updated():
             return
-        self.conversation_dict.update(conversation_set.conversation_dict)
+        self.conversation_dict.update(conv_set.conversation_dict)
         self.conversation_list = list(self.conversation_dict.values())
 
     def grouped_by_week(self) -> dict[datetime, "ConversationSet"]:
@@ -51,7 +51,7 @@ def grouped_by_week(self) -> dict[datetime, "ConversationSet"]:
             week_start: datetime = conversation.start_of_week()
             if week_start not in grouped:
                 grouped[week_start] = ConversationSet(conversations=[])
-            grouped[week_start].add_conversation(conversation=conversation)
+            grouped[week_start].add_conversation(conv=conversation)
         return grouped
 
     def grouped_by_month(self) -> dict[datetime, "ConversationSet"]:
@@ -61,7 +61,7 @@ def grouped_by_month(self) -> dict[datetime, "ConversationSet"]:
             month_start: datetime = conversation.start_of_month()
             if month_start not in grouped:
                 grouped[month_start] = ConversationSet(conversations=[])
-            grouped[month_start].add_conversation(conversation=conversation)
+            grouped[month_start].add_conversation(conv=conversation)
         return grouped
 
     def grouped_by_year(self) -> dict[datetime, "ConversationSet"]:
@@ -71,7 +71,7 @@ def grouped_by_year(self) -> dict[datetime, "ConversationSet"]:
             year_start: datetime = conversation.start_of_year()
             if year_start not in grouped:
                 grouped[year_start] = ConversationSet(conversations=[])
-            grouped[year_start].add_conversation(conversation=conversation)
+            grouped[year_start].add_conversation(conv=conversation)
         return grouped
 
     def all_custom_instructions(self) -> list[dict[str, Any]]:

diff --git a/models/message.py b/models/message.py
@@ -48,9 +48,7 @@ def author_header(self) -> str:
     def content_text(self) -> str:
         """get the text content of the message."""
         if "parts" in self.content:
-            return str(
-                object=self.content["parts"][0]
-            )  # suggested by @turnboughsg, pr #24
+            return str(object=self.content["parts"][0])
         if "text" in self.content:
             return f"```python\n{self.content['text']}\n```"
         return ""

diff --git a/models/node.py b/models/node.py
@@ -20,13 +20,13 @@ class Node:
 
     def __init__(
         self,
-        node_id: str,
-        message: Message | None,
+        n_id: str,
+        msg: Message | None,
         parent: Optional["Node"],
         children: Optional[list["Node"]],
     ) -> None:
-        self.id: str = node_id
-        self.message: Message | None = message
+        self.id: str = n_id
+        self.message: Message | None = msg
         self.parent: Node | None = parent
         self.children: list[Node] = children if children else []
 
@@ -45,7 +45,7 @@ def nodes_from_mapping(mapping: dict[str, Any]) -> dict[str, "Node"]:
             message: Message | None = (
                 Message(message=value["message"]) if value.get("message") else None
             )
-            nodes[key] = Node(node_id=key, message=message, parent=None, children=None)
+            nodes[key] = Node(n_id=key, msg=message, parent=None, children=None)
 
         # Second pass: Connect nodes
         for key, value in mapping.items():

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,10 +1,6 @@
-matplotlib
-nltk
-pandas
-questionary
-seaborn
-tqdm
-wordcloud
-pylint
+-r requirements.txt
+pytest
 mypy
-pytest
+ruff
+black
+isort
diff --git a/tests/test_conversation.py b/tests/test_conversation.py
@@ -11,8 +11,8 @@ def sample_conversation_data() -> dict[str, Any]:
 
     return {
         "title": "Sample Conversation",
-        "create_time": 1642540800.0,  # Jan 19, 2022, 12:00:00 PM
-        "update_time": 1642540900.0,  # Jan 19, 2022, 12:01:40 PM
+        "create_time": 1642540800.0,  # Jan 18, 2022, 12:00:00 PM
+        "update_time": 1642540900.0,  # Jan 18, 2022, 12:01:40 PM
         "mapping": {
             "node1": {
                 "message": {