diff --git a/.vscode/extensions.json b/.vscode/extensions.json index da7b5fe..5420702 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -4,8 +4,8 @@ "usernamehw.errorlens", "njqdev.vscode-python-typehint", "ms-python.python", - "ms-python.pylint", "matangover.mypy", - "ms-toolsai.jupyter" + "ms-toolsai.jupyter", + "charliermarsh.ruff" ] } \ No newline at end of file diff --git a/controllers/data_analysis.py b/controllers/data_analysis.py index 169f432..2d4418a 100644 --- a/controllers/data_analysis.py +++ b/controllers/data_analysis.py @@ -9,6 +9,7 @@ import matplotlib.pyplot as plt import nltk # type: ignore import pandas as pd +from matplotlib.axes import Axes from nltk.corpus import stopwords # type: ignore from pandas.core.series import Series from wordcloud import WordCloud # type: ignore @@ -77,23 +78,23 @@ def wordcloud_from_text( def wordcloud_from_conversation_set( - conversation_set: ConversationSet, **kwargs: Any + conv_set: ConversationSet, **kwargs: Any ) -> WordCloud: """Creates a wordcloud from the given conversation set. Returns a WordCloud object.""" text: str = ( - conversation_set.all_author_text(author="user") + conv_set.all_author_text(author="user") + "\n" - + conversation_set.all_author_text(author="assistant") + + conv_set.all_author_text(author="assistant") ) return wordcloud_from_text(text=text, **kwargs) -def create_save_graph(all_timestamps: list[float], file_path: Path) -> None: +def create_save_graph(timestamps: list[float], file_path: Path) -> None: """Creates and saves a graph from the given timestamps.""" - df = pd.DataFrame(data=all_timestamps, columns=["timestamp"]) # type: ignore + df = pd.DataFrame(data=timestamps, columns=["timestamp"]) # type: ignore df["datetime"] = pd.to_datetime(arg=df["timestamp"], unit="s") # type: ignore daily_counts: Series = df.groupby(by=df["datetime"].dt.date).size() # type: ignore @@ -111,13 +112,15 @@ def create_save_graph(all_timestamps: list[float], file_path: Path) -> None: markeredgewidth=0.5, ) - plt.title(label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20) # type: ignore + plt.title( # type: ignore + label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20 + ) plt.xlabel(xlabel="Month", fontsize=16, labelpad=15) # type: ignore plt.ylabel(ylabel="Number of Prompts", fontsize=16, labelpad=15) # type: ignore plt.xticks(fontsize=14) # type: ignore plt.yticks(fontsize=14) # type: ignore - ax = plt.gca() # type: ignore + ax: Axes = plt.gca() ax.xaxis.set_major_locator(locator=mdates.MonthLocator()) # type: ignore ax.xaxis.set_major_formatter(formatter=mdates.DateFormatter(fmt="%B")) # type: ignore diff --git a/controllers/file_system.py b/controllers/file_system.py index 04e2996..37227aa 100644 --- a/controllers/file_system.py +++ b/controllers/file_system.py @@ -6,7 +6,6 @@ and configuration.py, but that's a placeholder for user input in whatever form, may be replaced later, with a GUI or something)""" - from datetime import datetime from json import dump, load from os import utime @@ -42,41 +41,39 @@ def load_conversations_from_bookmarklet_json(json_filepath: Path) -> Conversatio """Load the conversations from the bookmarklet json export file.""" with open(file=json_filepath, mode="r", encoding="utf-8") as file: - conversations = load(file) + conversations = load(fp=file) return ConversationSet(conversations=conversations) -def save_conversation_to_file(conversation: Conversation, file_path: Path) -> None: +def save_conversation_to_file(conversation: Conversation, filepath: Path) -> None: """Save a conversation to a file, with added modification time.""" - base_file_name: str = file_path.stem + base_file_name: str = filepath.stem counter = 0 - while file_path.exists(): + while filepath.exists(): counter += 1 - file_path = file_path.with_name( - name=f"{base_file_name} ({counter}){file_path.suffix}" + filepath = filepath.with_name( + name=f"{base_file_name} ({counter}){filepath.suffix}" ) - with open(file=file_path, mode="w", encoding="utf-8") as file: + with open(file=filepath, mode="w", encoding="utf-8") as file: file.write(conversation.to_markdown()) - utime(path=file_path, times=(conversation.update_time, conversation.update_time)) + utime(path=filepath, times=(conversation.update_time, conversation.update_time)) -def save_conversation_set_to_dir( - conversation_set: ConversationSet, dir_path: Path -) -> None: +def save_conversation_set_to_dir(conv_set: ConversationSet, dir_path: Path) -> None: """Save a conversation set to a directory, one markdown file per conversation.""" for conversation in tqdm( - iterable=conversation_set.conversation_list, desc="Writing Markdown ๐Ÿ“„ files" + iterable=conv_set.conversation_list, desc="Writing Markdown ๐Ÿ“„ files" ): file_path: Path = dir_path / f"{conversation.sanitized_title()}.md" - save_conversation_to_file(conversation=conversation, file_path=file_path) + save_conversation_to_file(conversation=conversation, filepath=file_path) def save_wordcloud_from_conversation_set( - conversation_set: ConversationSet, - folder_path: Path, + conv_set: ConversationSet, + dir_path: Path, time_period: tuple[datetime, str], **kwargs: Any, ) -> None: @@ -91,26 +88,26 @@ def save_wordcloud_from_conversation_set( case _: raise ValueError("Invalid time period for wordcloud") - wordcloud_from_conversation_set(conversation_set=conversation_set, **kwargs).to_file( # type: ignore - filename=folder_path / file_name + wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file( # type: ignore + filename=dir_path / file_name ) def generate_all_wordclouds( - conversation_set: ConversationSet, folder_path: Path, **kwargs: Any + conv_set: ConversationSet, dir_path: Path, **kwargs: Any ) -> None: """Create the wordclouds and save them to the folder.""" - weeks_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_week() - months_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_month() - years_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_year() + weeks_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_week() + months_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_month() + years_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_year() for week in tqdm( iterable=weeks_dict.keys(), desc="Creating weekly wordclouds ๐Ÿ”กโ˜๏ธ " ): save_wordcloud_from_conversation_set( - conversation_set=weeks_dict[week], - folder_path=folder_path, + conv_set=weeks_dict[week], + dir_path=dir_path, time_period=(week, "week"), **kwargs, ) @@ -119,8 +116,8 @@ def generate_all_wordclouds( iterable=months_dict.keys(), desc="Creating monthly wordclouds ๐Ÿ”กโ˜๏ธ " ): save_wordcloud_from_conversation_set( - conversation_set=months_dict[month], - folder_path=folder_path, + conv_set=months_dict[month], + dir_path=dir_path, time_period=(month, "month"), **kwargs, ) @@ -129,20 +126,18 @@ def generate_all_wordclouds( iterable=years_dict.keys(), desc="Creating yearly wordclouds ๐Ÿ”กโ˜๏ธ " ): save_wordcloud_from_conversation_set( - conversation_set=years_dict[year], - folder_path=folder_path, + conv_set=years_dict[year], + dir_path=dir_path, time_period=(year, "year"), **kwargs, ) -def save_custom_instructions_to_file( - conversation_set: ConversationSet, file_path: Path -) -> None: +def save_custom_instructions_to_file(conv_set: ConversationSet, filepath: Path) -> None: """Create JSON file for custom instructions in the conversation set.""" - with open(file=file_path, mode="w", encoding="utf-8") as file: - dump(obj=conversation_set.all_custom_instructions(), fp=file, indent=2) + with open(file=filepath, mode="w", encoding="utf-8") as file: + dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2) def default_output_folder() -> str: diff --git a/main.py b/main.py index e609810..7dcf5d1 100644 --- a/main.py +++ b/main.py @@ -64,7 +64,7 @@ def main() -> None: json_filepath=bookmarklet_json_filepath ) ) - all_conversations_set.update(conversation_set=bookmarklet_conversations_set) + all_conversations_set.update(conv_set=bookmarklet_conversations_set) output_folder = Path(configs_dict["output_folder"]) @@ -78,7 +78,7 @@ def main() -> None: markdown_folder.mkdir(parents=True, exist_ok=True) save_conversation_set_to_dir( - conversation_set=all_conversations_set, dir_path=markdown_folder + conv_set=all_conversations_set, dir_path=markdown_folder ) print(f"\nDone โœ… ! Check the output ๐Ÿ“„ here : {markdown_folder.as_uri()} ๐Ÿ”—\n") @@ -91,9 +91,7 @@ def main() -> None: graph_path: Path = graph_folder / "all messages.png" create_save_graph( - all_timestamps=all_conversations_set.all_author_message_timestamps( - author="user" - ), + timestamps=all_conversations_set.all_author_message_timestamps(author="user"), file_path=graph_path, ) @@ -107,8 +105,8 @@ def main() -> None: colormap = configs_dict["wordcloud"]["colormap"] generate_all_wordclouds( - conversation_set=all_conversations_set, - folder_path=wordcloud_folder, + conv_set=all_conversations_set, + dir_path=wordcloud_folder, font_path=font_path, colormap=colormap, ) @@ -120,7 +118,7 @@ def main() -> None: custom_instructions_filepath: Path = output_folder / "custom_instructions.json" save_custom_instructions_to_file( - conversation_set=all_conversations_set, file_path=custom_instructions_filepath + conv_set=all_conversations_set, filepath=custom_instructions_filepath ) print( diff --git a/models/conversation_set.py b/models/conversation_set.py index 5e87150..59dd3e9 100644 --- a/models/conversation_set.py +++ b/models/conversation_set.py @@ -28,20 +28,20 @@ def __init__(self, conversations: list[dict[str, Any]]) -> None: self.conversation_list = list(self.conversation_dict.values()) - def add_conversation(self, conversation: Conversation) -> None: + def add_conversation(self, conv: Conversation) -> None: """Add a conversation to the dictionary and list.""" - self.conversation_dict[conversation.conversation_id] = conversation - self.conversation_list.append(conversation) + self.conversation_dict[conv.conversation_id] = conv + self.conversation_list.append(conv) def last_updated(self) -> float: """Returns the timestamp of the last updated conversation in the list.""" return max(conversation.update_time for conversation in self.conversation_list) - def update(self, conversation_set: "ConversationSet") -> None: + def update(self, conv_set: "ConversationSet") -> None: """Update the conversation set with the new one.""" - if conversation_set.last_updated() <= self.last_updated(): + if conv_set.last_updated() <= self.last_updated(): return - self.conversation_dict.update(conversation_set.conversation_dict) + self.conversation_dict.update(conv_set.conversation_dict) self.conversation_list = list(self.conversation_dict.values()) def grouped_by_week(self) -> dict[datetime, "ConversationSet"]: @@ -51,7 +51,7 @@ def grouped_by_week(self) -> dict[datetime, "ConversationSet"]: week_start: datetime = conversation.start_of_week() if week_start not in grouped: grouped[week_start] = ConversationSet(conversations=[]) - grouped[week_start].add_conversation(conversation=conversation) + grouped[week_start].add_conversation(conv=conversation) return grouped def grouped_by_month(self) -> dict[datetime, "ConversationSet"]: @@ -61,7 +61,7 @@ def grouped_by_month(self) -> dict[datetime, "ConversationSet"]: month_start: datetime = conversation.start_of_month() if month_start not in grouped: grouped[month_start] = ConversationSet(conversations=[]) - grouped[month_start].add_conversation(conversation=conversation) + grouped[month_start].add_conversation(conv=conversation) return grouped def grouped_by_year(self) -> dict[datetime, "ConversationSet"]: @@ -71,7 +71,7 @@ def grouped_by_year(self) -> dict[datetime, "ConversationSet"]: year_start: datetime = conversation.start_of_year() if year_start not in grouped: grouped[year_start] = ConversationSet(conversations=[]) - grouped[year_start].add_conversation(conversation=conversation) + grouped[year_start].add_conversation(conv=conversation) return grouped def all_custom_instructions(self) -> list[dict[str, Any]]: diff --git a/models/message.py b/models/message.py index e7a89e7..4e89cbd 100644 --- a/models/message.py +++ b/models/message.py @@ -48,9 +48,7 @@ def author_header(self) -> str: def content_text(self) -> str: """get the text content of the message.""" if "parts" in self.content: - return str( - object=self.content["parts"][0] - ) # suggested by @turnboughsg, pr #24 + return str(object=self.content["parts"][0]) if "text" in self.content: return f"```python\n{self.content['text']}\n```" return "" diff --git a/models/node.py b/models/node.py index d6c7c24..8dcfb44 100644 --- a/models/node.py +++ b/models/node.py @@ -20,13 +20,13 @@ class Node: def __init__( self, - node_id: str, - message: Message | None, + n_id: str, + msg: Message | None, parent: Optional["Node"], children: Optional[list["Node"]], ) -> None: - self.id: str = node_id - self.message: Message | None = message + self.id: str = n_id + self.message: Message | None = msg self.parent: Node | None = parent self.children: list[Node] = children if children else [] @@ -45,7 +45,7 @@ def nodes_from_mapping(mapping: dict[str, Any]) -> dict[str, "Node"]: message: Message | None = ( Message(message=value["message"]) if value.get("message") else None ) - nodes[key] = Node(node_id=key, message=message, parent=None, children=None) + nodes[key] = Node(n_id=key, msg=message, parent=None, children=None) # Second pass: Connect nodes for key, value in mapping.items(): diff --git a/requirements-dev.txt b/requirements-dev.txt index 62f4f07..e510484 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,10 +1,6 @@ -matplotlib -nltk -pandas -questionary -seaborn -tqdm -wordcloud -pylint +-r requirements.txt +pytest mypy -pytest \ No newline at end of file +ruff +black +isort \ No newline at end of file diff --git a/tests/test_conversation.py b/tests/test_conversation.py index 97add47..28b2aee 100644 --- a/tests/test_conversation.py +++ b/tests/test_conversation.py @@ -11,8 +11,8 @@ def sample_conversation_data() -> dict[str, Any]: return { "title": "Sample Conversation", - "create_time": 1642540800.0, # Jan 19, 2022, 12:00:00 PM - "update_time": 1642540900.0, # Jan 19, 2022, 12:01:40 PM + "create_time": 1642540800.0, # Jan 18, 2022, 12:00:00 PM + "update_time": 1642540900.0, # Jan 18, 2022, 12:01:40 PM "mapping": { "node1": { "message": { diff --git a/tests/test_node.py b/tests/test_node.py index 00a1407..75c7f75 100644 --- a/tests/test_node.py +++ b/tests/test_node.py @@ -11,7 +11,7 @@ def test_node_initialization() -> None: node_id = "node_id_123" msg = Message(message={"id": "msg_id_123", "author": {"role": "user"}}) - node = Node(node_id=node_id, message=msg, parent=None, children=None) + node = Node(n_id=node_id, msg=msg, parent=None, children=None) assert node.id == node_id assert node.message == msg @@ -22,10 +22,8 @@ def test_node_initialization() -> None: def test_add_child() -> None: """Test add_child method.""" - parent_node = Node( - node_id="parent_node_id", message=None, parent=None, children=None - ) - child_node = Node(node_id="child_node_id", message=None, parent=None, children=None) + parent_node = Node(n_id="parent_node_id", msg=None, parent=None, children=None) + child_node = Node(n_id="child_node_id", msg=None, parent=None, children=None) parent_node.add_child(node=child_node) @@ -63,15 +61,11 @@ def test_nodes_from_mapping() -> None: def test_header_with_root_sys_and_user() -> None: """Test header method with root, system and user nodes.""" - root = Node(node_id="root_id", message=None, parent=None, children=None) + root = Node(n_id="root_id", msg=None, parent=None, children=None) system_msg = Message(message={"id": "sys_msg_id", "author": {"role": "system"}}) - sys_node = Node( - node_id="sys_node_id", message=system_msg, parent=root, children=None - ) + sys_node = Node(n_id="sys_node_id", msg=system_msg, parent=root, children=None) user_msg = Message(message={"id": "user_msg_id", "author": {"role": "user"}}) - user_node = Node( - node_id="user_node_id", message=user_msg, parent=sys_node, children=None - ) + user_node = Node(n_id="user_node_id", msg=user_msg, parent=sys_node, children=None) header: str = user_node.header() assert "node_id" in header @@ -82,9 +76,9 @@ def test_header_with_root_sys_and_user() -> None: def test_footer_with_multiple_children() -> None: """Test footer method with multiple children.""" - node = Node(node_id="node_id", message=None, parent=None, children=None) - child1 = Node(node_id="child1_id", message=None, parent=None, children=None) - child2 = Node(node_id="child2_id", message=None, parent=None, children=None) + node = Node(n_id="node_id", msg=None, parent=None, children=None) + child1 = Node(n_id="child1_id", msg=None, parent=None, children=None) + child2 = Node(n_id="child2_id", msg=None, parent=None, children=None) node.add_child(node=child1) node.add_child(node=child2)