diff --git a/cli.py b/cli.py index a426a96..c15717d 100644 --- a/cli.py +++ b/cli.py @@ -10,8 +10,8 @@ from controllers.file_system import ( conversation_set_from_json, conversation_set_from_zip, - create_n_save_all_weekwise_graphs, - generate_n_save_all_wordclouds, + create_all_weekwise_graphs, + create_all_wordclouds, get_bookmarklet_json_filepath, save_conversation_set, save_custom_instructions, @@ -75,7 +75,7 @@ def main() -> None: graph_folder: Path = output_folder / "Graphs" graph_folder.mkdir(parents=True, exist_ok=True) - create_n_save_all_weekwise_graphs( + create_all_weekwise_graphs( conv_set=all_conversations_set, dir_path=graph_folder, ) @@ -90,7 +90,7 @@ def main() -> None: colormap: str = configs_dict["wordcloud"]["colormap"] custom_stopwords: str = configs_dict["wordcloud"]["custom_stopwords"] - generate_n_save_all_wordclouds( + create_all_wordclouds( conv_set=all_conversations_set, dir_path=wordcloud_folder, font_path=font_path, diff --git a/controllers/configuration.py b/controllers/configuration.py index bcadca7..d6d9082 100644 --- a/controllers/configuration.py +++ b/controllers/configuration.py @@ -25,7 +25,7 @@ def get_user_configs() -> dict[str, Any]: default_configs = json_load(fp=file) if not default_configs["zip_file"]: - default_configs["zip_file"] = get_most_recently_downloaded_zip() + default_configs["zip_file"] = str(object=get_most_recently_downloaded_zip()) if not default_configs["output_folder"]: default_configs["output_folder"] = str(object=DEFAULT_OUTPUT_FOLDER) diff --git a/controllers/data_analysis.py b/controllers/data_analysis.py index f87e914..6e22d0e 100644 --- a/controllers/data_analysis.py +++ b/controllers/data_analysis.py @@ -9,9 +9,10 @@ from datetime import datetime, timezone from typing import TYPE_CHECKING, Any -import nltk # type: ignore[import-untyped] from matplotlib.figure import Figure -from nltk.corpus import stopwords # type: ignore[import-untyped] +from nltk import data as nltk_data # type: ignore[import-untyped] +from nltk import download as nltk_download # type: ignore[import-untyped] +from nltk.corpus import stopwords as nltk_stopwords # type: ignore[import-untyped] from wordcloud import WordCloud # type: ignore[import-untyped] if TYPE_CHECKING: @@ -23,7 +24,7 @@ def weekwise_graph_from_timestamps( timestamps: list[float], **kwargs: str, -) -> tuple[Figure, Axes]: +) -> Figure: """Create a bar graph from the given timestamps, collapsed on one week.""" dates: list[datetime] = [ datetime.fromtimestamp(ts, tz=timezone.utc) for ts in timestamps @@ -65,13 +66,13 @@ def weekwise_graph_from_timestamps( ax.set_xticklabels(labels=x, rotation=45) fig.tight_layout() - return fig, ax + return fig def weekwise_graph_from_conversation_set( conv_set: ConversationSet, **kwargs: str, -) -> tuple[Figure, Axes]: +) -> Figure: """Create a bar graph from the given conversation set.""" timestamps: list[float] = conv_set.all_author_message_timestamps(author="user") return weekwise_graph_from_timestamps(timestamps=timestamps, **kwargs) @@ -81,9 +82,9 @@ def weekwise_graph_from_conversation_set( def load_nltk_stopwords() -> set[str]: """Load nltk stopwords.""" try: - nltk.data.find(resource_name="corpora/stopwords") + nltk_data.find(resource_name="corpora/stopwords") except LookupError: - nltk.download(info_or_id="stopwords") + nltk_download(info_or_id="stopwords") languages: list[str] = [ "arabic", @@ -94,11 +95,7 @@ def load_nltk_stopwords() -> set[str]: "portuguese", ] # add more languages here ... - stop_words: set[str] = { - word for lang in languages for word in stopwords.words(fileids=lang) - } - - return stop_words + return {word for lang in languages for word in nltk_stopwords.words(fileids=lang)} def wordcloud_from_text( diff --git a/controllers/file_system.py b/controllers/file_system.py index e42c3b3..2057b55 100644 --- a/controllers/file_system.py +++ b/controllers/file_system.py @@ -11,7 +11,7 @@ from json import dump as json_dump from json import load as json_load -from os import utime +from os import utime as os_utime from pathlib import Path from typing import TYPE_CHECKING, Any, Literal from zipfile import ZipFile @@ -28,22 +28,13 @@ if TYPE_CHECKING: from datetime import datetime + from matplotlib.figure import Figure + from wordcloud import WordCloud # type: ignore[import-untyped] + from models.conversation import Conversation -def conversation_set_from_zip(zip_filepath: Path) -> ConversationSet: - """Load conversations from a zip file, containing a 'conversations.json' file.""" - with ZipFile(file=zip_filepath, mode="r") as file: - file.extractall(path=zip_filepath.with_suffix(suffix="")) - - conversations_path: Path = ( - zip_filepath.with_suffix(suffix="") / "conversations.json" - ) - - with conversations_path.open(encoding="utf-8") as file: - conversations = json_load(fp=file) - - return ConversationSet(conversations=conversations) +DOWNLOADS_PATH: Path = Path.home() / "Downloads" def conversation_set_from_json(json_filepath: Path) -> ConversationSet: @@ -54,8 +45,23 @@ def conversation_set_from_json(json_filepath: Path) -> ConversationSet: return ConversationSet(conversations=conversations) +def extract_zip(zip_filepath: Path) -> Path: + """Extract the zip file to the same folder.""" + with ZipFile(file=zip_filepath, mode="r") as file: + file.extractall(path=zip_filepath.with_suffix(suffix="")) + + return zip_filepath.with_suffix(suffix="") + + +def conversation_set_from_zip(zip_filepath: Path) -> ConversationSet: + """Load conversations from a zip file, containing a 'conversations.json' file.""" + return conversation_set_from_json( + json_filepath=extract_zip(zip_filepath=zip_filepath) / "conversations.json", + ) + + def save_conversation(conversation: Conversation, filepath: Path) -> None: - """Save a conversation to a file, with added modification time.""" + """Save the conversation to the file, with added modification time.""" base_file_name: str = filepath.stem counter = 0 @@ -67,17 +73,24 @@ def save_conversation(conversation: Conversation, filepath: Path) -> None: with filepath.open(mode="w", encoding="utf-8") as file: file.write(conversation.markdown_text()) - utime(path=filepath, times=(conversation.update_time, conversation.update_time)) + os_utime(path=filepath, times=(conversation.update_time, conversation.update_time)) def save_conversation_set(conv_set: ConversationSet, dir_path: Path) -> None: - """Save a conversation set to a directory, one markdown file per conversation.""" + """Save the conversation set to the directory.""" for conversation in tqdm( iterable=conv_set.conversation_list, desc="Writing Markdown 📄 files", ): - file_path: Path = dir_path / f"{conversation.sanitized_title()}.md" - save_conversation(conversation=conversation, filepath=file_path) + save_conversation( + conversation=conversation, + filepath=dir_path / f"{conversation.sanitized_title()}.md", + ) + + +def save_figure(figure: Figure, filepath: Path) -> None: + """Save the figure to the file.""" + figure.savefig(fname=filepath, dpi=300) def save_weekwise_graph_from_conversation_set( @@ -89,27 +102,27 @@ def save_weekwise_graph_from_conversation_set( """Create a weekwise graph and saves it to the folder.""" if time_period[1] == "month": file_name: str = f"{time_period[0].strftime('%Y %B')}.png" - weekwise_graph_from_conversation_set( - conv_set=conv_set, - month_name=time_period[0].strftime("%B '%y"), - **kwargs, - )[0].savefig( - fname=dir_path / file_name, - dpi=300, + save_figure( + figure=weekwise_graph_from_conversation_set( + conv_set=conv_set, + month_name=time_period[0].strftime("%B '%y"), + **kwargs, + ), + filepath=dir_path / file_name, ) elif time_period[1] == "year": file_name = f"{time_period[0].strftime('%Y')}.png" - weekwise_graph_from_conversation_set( - conv_set=conv_set, - year=time_period[0].strftime("%Y"), - **kwargs, - )[0].savefig( - fname=dir_path / file_name, - dpi=300, + save_figure( + figure=weekwise_graph_from_conversation_set( + conv_set=conv_set, + year=time_period[0].strftime("%Y"), + **kwargs, + ), + filepath=dir_path / file_name, ) -def create_n_save_all_weekwise_graphs( +def create_all_weekwise_graphs( conv_set: ConversationSet, dir_path: Path, **kwargs: Any, @@ -141,6 +154,11 @@ def create_n_save_all_weekwise_graphs( ) +def save_wordcloud(wordcloud: WordCloud, filepath: Path) -> None: + """Save the word cloud to the file.""" + wordcloud.to_file(filename=filepath) + + def save_wordcloud_from_conversation_set( conv_set: ConversationSet, dir_path: Path, @@ -155,12 +173,13 @@ def save_wordcloud_from_conversation_set( elif time_period[1] == "year": file_name = f"{time_period[0].strftime('%Y')}.png" - wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file( - filename=dir_path / file_name, + save_wordcloud( + wordcloud=wordcloud_from_conversation_set(conv_set=conv_set, **kwargs), + filepath=dir_path / file_name, ) -def generate_n_save_all_wordclouds( +def create_all_wordclouds( conv_set: ConversationSet, dir_path: Path, **kwargs: Any, @@ -210,34 +229,23 @@ def save_custom_instructions(conv_set: ConversationSet, filepath: Path) -> None: json_dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2) -def get_most_recently_downloaded_zip() -> str: +def get_most_recently_downloaded_zip() -> Path | str: """Path to the most recently created zip file in the Downloads folder.""" - downloads_folder: Path = Path.home() / "Downloads" - - zip_files: list[Path] = list(downloads_folder.glob(pattern="*.zip")) + zip_files: list[Path] = list(DOWNLOADS_PATH.glob(pattern="*.zip")) if not zip_files: return "" - default_zip_filepath: Path = max(zip_files, key=lambda x: x.stat().st_ctime) - - return str(object=default_zip_filepath) + return max(zip_files, key=lambda x: x.stat().st_ctime) def get_bookmarklet_json_filepath() -> Path | None: """Path to the most recent JSON file in Downloads with 'bookmarklet' in the name.""" - downloads_folder: Path = Path.home() / "Downloads" - - bookmarklet_json_files: list[Path] = [ - x for x in downloads_folder.glob(pattern="*.json") if "bookmarklet" in x.name + bkmrklet_files: list[Path] = [ + x for x in DOWNLOADS_PATH.glob(pattern="*.json") if "bookmarklet" in x.name ] - if not bookmarklet_json_files: + if not bkmrklet_files: return None - bookmarklet_json_filepath: Path = max( - bookmarklet_json_files, - key=lambda x: x.stat().st_ctime, - ) - - return bookmarklet_json_filepath + return max(bkmrklet_files, key=lambda x: x.stat().st_ctime) diff --git a/setup.py b/setup.py index 652aade..1439a19 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,9 @@ """Module to setup the virtual environment and install requirements.""" -import os -import sys -import venv +from os import system from pathlib import Path +from sys import platform +from venv import EnvBuilder BASE_DIR: Path = Path(__file__).parent.resolve() VENV_DIR: Path = BASE_DIR / ".venv" @@ -11,7 +11,7 @@ def create_virtual_environment() -> None: """Create a virtual environment in the project directory.""" - env_builder = venv.EnvBuilder(with_pip=True) + env_builder = EnvBuilder(with_pip=True) env_builder.create(env_dir=VENV_DIR) @@ -19,17 +19,18 @@ def pip_install_requirements() -> None: """Install requirements from requirements.txt.""" pip_exe: Path = ( VENV_DIR / "bin" / "pip" - if sys.platform != "win32" + if platform != "win32" else VENV_DIR / "Scripts" / "pip.exe" ) # Install requirements - os.system(command=f"{pip_exe} install -r requirements.txt") + system(command=f"{pip_exe} install -r requirements.txt") if __name__ == "__main__": print("Creating virtual environment...\n") create_virtual_environment() + print("Installing requirements... (This may take a minute..)\n") pip_install_requirements() @@ -38,7 +39,7 @@ def pip_install_requirements() -> None: "\nTo activate the virtual environment, " "use the following command based on your platform:\n", ) - if sys.platform == "win32": + if platform == "win32": print( "\nFor Command Prompt:\n\t.venv\\Scripts\\activate.bat\n" "\nFor PowerShell:\n\t.venv\\Scripts\\Activate.ps1\n", diff --git a/utils/utils.py b/utils/utils.py index f5dcacd..bf6b228 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -3,7 +3,7 @@ from __future__ import annotations from pathlib import Path -from re import sub +from re import sub as re_sub from zipfile import ZipFile @@ -32,30 +32,22 @@ def ensure_closed_code_blocks(string: str) -> str: def replace_latex_delimiters(string: str) -> str: """Replace all the LaTeX bracket delimiters in the string with dollar sign ones.""" - string = sub(pattern=r"\\\[", repl="$$", string=string) - string = sub(pattern=r"\\\]", repl="$$", string=string) - string = sub(pattern=r"\\\(", repl="$", string=string) + string = re_sub(pattern=r"\\\[", repl="$$", string=string) + string = re_sub(pattern=r"\\\]", repl="$$", string=string) + string = re_sub(pattern=r"\\\(", repl="$", string=string) - return sub(pattern=r"\\\)", repl="$", string=string) + return re_sub(pattern=r"\\\)", repl="$", string=string) def get_font_names() -> list[str]: """Return a list of all the font names in the assets/fonts folder.""" - fonts_path = Path("assets/fonts") - font_names: list[str] = [ - font.stem for font in fonts_path.iterdir() if font.is_file() - ] - return font_names + return [font.stem for font in Path("assets/fonts").iterdir()] def get_colormap_names() -> list[str]: """Return a list of all the colormap names in the assets/colormaps.txt file.""" - colormaps_path = Path("assets/colormaps.txt") - - with colormaps_path.open(encoding="utf-8") as file: - colormaps_list: list[str] = file.read().splitlines() - - return colormaps_list + with Path("assets/colormaps.txt").open(encoding="utf-8") as file: + return file.read().splitlines() def validate_zip_file(path_str: str) -> bool: diff --git a/views/prompt_user.py b/views/prompt_user.py index 7d8e66f..247cf2c 100644 --- a/views/prompt_user.py +++ b/views/prompt_user.py @@ -4,7 +4,18 @@ from typing import Any -from questionary import Choice, Style, checkbox, path, select, text +from questionary import ( + Choice, + Style, + checkbox, + select, +) +from questionary import ( + path as qst_path, +) +from questionary import ( + text as qst_text, +) from utils.utils import get_colormap_names, get_font_names, validate_zip_file @@ -39,14 +50,14 @@ def prompt_user(default_configs: dict[str, Any]) -> dict[str, Any]: # ------------------------ zip file and output folder ------------------------ - user_configs["zip_file"] = path( + user_configs["zip_file"] = qst_path( message="Enter the path to the zip file :", default=default_configs["zip_file"], validate=validate_zip_file, style=custom_style, ).ask() - user_configs["output_folder"] = path( + user_configs["output_folder"] = qst_path( message="Enter the path to the output folder :", default=default_configs["output_folder"], style=custom_style, @@ -61,7 +72,7 @@ def prompt_user(default_configs: dict[str, Any]) -> dict[str, Any]: user_configs["message"]["author_headers"] = {} for author_role in default_configs["message"]["author_headers"]: - user_configs["message"]["author_headers"][author_role] = text( + user_configs["message"]["author_headers"][author_role] = qst_text( message=f"Enter the message header (#) for messages from '{author_role}' :", default=default_configs["message"]["author_headers"][author_role], validate=validate_header, @@ -123,7 +134,7 @@ def prompt_user(default_configs: dict[str, Any]) -> dict[str, Any]: style=custom_style, ).ask() - user_configs["wordcloud"]["custom_stopwords"] = text( + user_configs["wordcloud"]["custom_stopwords"] = qst_text( message="Enter custom stopwords (separated by commas) :", default=default_configs["wordcloud"]["custom_stopwords"], style=custom_style,