Skip to content

Commit

Permalink
~
Browse files Browse the repository at this point in the history
  • Loading branch information
mohamed-chs committed Oct 21, 2023
1 parent 171b392 commit 1bb9393
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 100 deletions.
8 changes: 4 additions & 4 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from controllers.file_system import (
conversation_set_from_json,
conversation_set_from_zip,
create_n_save_all_weekwise_graphs,
generate_n_save_all_wordclouds,
create_all_weekwise_graphs,
create_all_wordclouds,
get_bookmarklet_json_filepath,
save_conversation_set,
save_custom_instructions,
Expand Down Expand Up @@ -75,7 +75,7 @@ def main() -> None:
graph_folder: Path = output_folder / "Graphs"
graph_folder.mkdir(parents=True, exist_ok=True)

create_n_save_all_weekwise_graphs(
create_all_weekwise_graphs(
conv_set=all_conversations_set,
dir_path=graph_folder,
)
Expand All @@ -90,7 +90,7 @@ def main() -> None:
colormap: str = configs_dict["wordcloud"]["colormap"]
custom_stopwords: str = configs_dict["wordcloud"]["custom_stopwords"]

generate_n_save_all_wordclouds(
create_all_wordclouds(
conv_set=all_conversations_set,
dir_path=wordcloud_folder,
font_path=font_path,
Expand Down
2 changes: 1 addition & 1 deletion controllers/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_user_configs() -> dict[str, Any]:
default_configs = json_load(fp=file)

if not default_configs["zip_file"]:
default_configs["zip_file"] = get_most_recently_downloaded_zip()
default_configs["zip_file"] = str(object=get_most_recently_downloaded_zip())

if not default_configs["output_folder"]:
default_configs["output_folder"] = str(object=DEFAULT_OUTPUT_FOLDER)
Expand Down
21 changes: 9 additions & 12 deletions controllers/data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any

import nltk # type: ignore[import-untyped]
from matplotlib.figure import Figure
from nltk.corpus import stopwords # type: ignore[import-untyped]
from nltk import data as nltk_data # type: ignore[import-untyped]
from nltk import download as nltk_download # type: ignore[import-untyped]
from nltk.corpus import stopwords as nltk_stopwords # type: ignore[import-untyped]
from wordcloud import WordCloud # type: ignore[import-untyped]

if TYPE_CHECKING:
Expand All @@ -23,7 +24,7 @@
def weekwise_graph_from_timestamps(
timestamps: list[float],
**kwargs: str,
) -> tuple[Figure, Axes]:
) -> Figure:
"""Create a bar graph from the given timestamps, collapsed on one week."""
dates: list[datetime] = [
datetime.fromtimestamp(ts, tz=timezone.utc) for ts in timestamps
Expand Down Expand Up @@ -65,13 +66,13 @@ def weekwise_graph_from_timestamps(
ax.set_xticklabels(labels=x, rotation=45)
fig.tight_layout()

return fig, ax
return fig


def weekwise_graph_from_conversation_set(
conv_set: ConversationSet,
**kwargs: str,
) -> tuple[Figure, Axes]:
) -> Figure:
"""Create a bar graph from the given conversation set."""
timestamps: list[float] = conv_set.all_author_message_timestamps(author="user")
return weekwise_graph_from_timestamps(timestamps=timestamps, **kwargs)
Expand All @@ -81,9 +82,9 @@ def weekwise_graph_from_conversation_set(
def load_nltk_stopwords() -> set[str]:
"""Load nltk stopwords."""
try:
nltk.data.find(resource_name="corpora/stopwords")
nltk_data.find(resource_name="corpora/stopwords")
except LookupError:
nltk.download(info_or_id="stopwords")
nltk_download(info_or_id="stopwords")

languages: list[str] = [
"arabic",
Expand All @@ -94,11 +95,7 @@ def load_nltk_stopwords() -> set[str]:
"portuguese",
] # add more languages here ...

stop_words: set[str] = {
word for lang in languages for word in stopwords.words(fileids=lang)
}

return stop_words
return {word for lang in languages for word in nltk_stopwords.words(fileids=lang)}


def wordcloud_from_text(
Expand Down
118 changes: 63 additions & 55 deletions controllers/file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from json import dump as json_dump
from json import load as json_load
from os import utime
from os import utime as os_utime
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal
from zipfile import ZipFile
Expand All @@ -28,22 +28,13 @@
if TYPE_CHECKING:
from datetime import datetime

from matplotlib.figure import Figure
from wordcloud import WordCloud # type: ignore[import-untyped]

from models.conversation import Conversation


def conversation_set_from_zip(zip_filepath: Path) -> ConversationSet:
"""Load conversations from a zip file, containing a 'conversations.json' file."""
with ZipFile(file=zip_filepath, mode="r") as file:
file.extractall(path=zip_filepath.with_suffix(suffix=""))

conversations_path: Path = (
zip_filepath.with_suffix(suffix="") / "conversations.json"
)

with conversations_path.open(encoding="utf-8") as file:
conversations = json_load(fp=file)

return ConversationSet(conversations=conversations)
DOWNLOADS_PATH: Path = Path.home() / "Downloads"


def conversation_set_from_json(json_filepath: Path) -> ConversationSet:
Expand All @@ -54,8 +45,23 @@ def conversation_set_from_json(json_filepath: Path) -> ConversationSet:
return ConversationSet(conversations=conversations)


def extract_zip(zip_filepath: Path) -> Path:
"""Extract the zip file to the same folder."""
with ZipFile(file=zip_filepath, mode="r") as file:
file.extractall(path=zip_filepath.with_suffix(suffix=""))

return zip_filepath.with_suffix(suffix="")


def conversation_set_from_zip(zip_filepath: Path) -> ConversationSet:
"""Load conversations from a zip file, containing a 'conversations.json' file."""
return conversation_set_from_json(
json_filepath=extract_zip(zip_filepath=zip_filepath) / "conversations.json",
)


def save_conversation(conversation: Conversation, filepath: Path) -> None:
"""Save a conversation to a file, with added modification time."""
"""Save the conversation to the file, with added modification time."""
base_file_name: str = filepath.stem

counter = 0
Expand All @@ -67,17 +73,24 @@ def save_conversation(conversation: Conversation, filepath: Path) -> None:

with filepath.open(mode="w", encoding="utf-8") as file:
file.write(conversation.markdown_text())
utime(path=filepath, times=(conversation.update_time, conversation.update_time))
os_utime(path=filepath, times=(conversation.update_time, conversation.update_time))


def save_conversation_set(conv_set: ConversationSet, dir_path: Path) -> None:
"""Save a conversation set to a directory, one markdown file per conversation."""
"""Save the conversation set to the directory."""
for conversation in tqdm(
iterable=conv_set.conversation_list,
desc="Writing Markdown 📄 files",
):
file_path: Path = dir_path / f"{conversation.sanitized_title()}.md"
save_conversation(conversation=conversation, filepath=file_path)
save_conversation(
conversation=conversation,
filepath=dir_path / f"{conversation.sanitized_title()}.md",
)


def save_figure(figure: Figure, filepath: Path) -> None:
"""Save the figure to the file."""
figure.savefig(fname=filepath, dpi=300)


def save_weekwise_graph_from_conversation_set(
Expand All @@ -89,27 +102,27 @@ def save_weekwise_graph_from_conversation_set(
"""Create a weekwise graph and saves it to the folder."""
if time_period[1] == "month":
file_name: str = f"{time_period[0].strftime('%Y %B')}.png"
weekwise_graph_from_conversation_set(
conv_set=conv_set,
month_name=time_period[0].strftime("%B '%y"),
**kwargs,
)[0].savefig(
fname=dir_path / file_name,
dpi=300,
save_figure(
figure=weekwise_graph_from_conversation_set(
conv_set=conv_set,
month_name=time_period[0].strftime("%B '%y"),
**kwargs,
),
filepath=dir_path / file_name,
)
elif time_period[1] == "year":
file_name = f"{time_period[0].strftime('%Y')}.png"
weekwise_graph_from_conversation_set(
conv_set=conv_set,
year=time_period[0].strftime("%Y"),
**kwargs,
)[0].savefig(
fname=dir_path / file_name,
dpi=300,
save_figure(
figure=weekwise_graph_from_conversation_set(
conv_set=conv_set,
year=time_period[0].strftime("%Y"),
**kwargs,
),
filepath=dir_path / file_name,
)


def create_n_save_all_weekwise_graphs(
def create_all_weekwise_graphs(
conv_set: ConversationSet,
dir_path: Path,
**kwargs: Any,
Expand Down Expand Up @@ -141,6 +154,11 @@ def create_n_save_all_weekwise_graphs(
)


def save_wordcloud(wordcloud: WordCloud, filepath: Path) -> None:
"""Save the word cloud to the file."""
wordcloud.to_file(filename=filepath)


def save_wordcloud_from_conversation_set(
conv_set: ConversationSet,
dir_path: Path,
Expand All @@ -155,12 +173,13 @@ def save_wordcloud_from_conversation_set(
elif time_period[1] == "year":
file_name = f"{time_period[0].strftime('%Y')}.png"

wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file(
filename=dir_path / file_name,
save_wordcloud(
wordcloud=wordcloud_from_conversation_set(conv_set=conv_set, **kwargs),
filepath=dir_path / file_name,
)


def generate_n_save_all_wordclouds(
def create_all_wordclouds(
conv_set: ConversationSet,
dir_path: Path,
**kwargs: Any,
Expand Down Expand Up @@ -210,34 +229,23 @@ def save_custom_instructions(conv_set: ConversationSet, filepath: Path) -> None:
json_dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2)


def get_most_recently_downloaded_zip() -> str:
def get_most_recently_downloaded_zip() -> Path | str:
"""Path to the most recently created zip file in the Downloads folder."""
downloads_folder: Path = Path.home() / "Downloads"

zip_files: list[Path] = list(downloads_folder.glob(pattern="*.zip"))
zip_files: list[Path] = list(DOWNLOADS_PATH.glob(pattern="*.zip"))

if not zip_files:
return ""

default_zip_filepath: Path = max(zip_files, key=lambda x: x.stat().st_ctime)

return str(object=default_zip_filepath)
return max(zip_files, key=lambda x: x.stat().st_ctime)


def get_bookmarklet_json_filepath() -> Path | None:
"""Path to the most recent JSON file in Downloads with 'bookmarklet' in the name."""
downloads_folder: Path = Path.home() / "Downloads"

bookmarklet_json_files: list[Path] = [
x for x in downloads_folder.glob(pattern="*.json") if "bookmarklet" in x.name
bkmrklet_files: list[Path] = [
x for x in DOWNLOADS_PATH.glob(pattern="*.json") if "bookmarklet" in x.name
]

if not bookmarklet_json_files:
if not bkmrklet_files:
return None

bookmarklet_json_filepath: Path = max(
bookmarklet_json_files,
key=lambda x: x.stat().st_ctime,
)

return bookmarklet_json_filepath
return max(bkmrklet_files, key=lambda x: x.stat().st_ctime)
15 changes: 8 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
"""Module to setup the virtual environment and install requirements."""

import os
import sys
import venv
from os import system
from pathlib import Path
from sys import platform
from venv import EnvBuilder

BASE_DIR: Path = Path(__file__).parent.resolve()
VENV_DIR: Path = BASE_DIR / ".venv"


def create_virtual_environment() -> None:
"""Create a virtual environment in the project directory."""
env_builder = venv.EnvBuilder(with_pip=True)
env_builder = EnvBuilder(with_pip=True)
env_builder.create(env_dir=VENV_DIR)


def pip_install_requirements() -> None:
"""Install requirements from requirements.txt."""
pip_exe: Path = (
VENV_DIR / "bin" / "pip"
if sys.platform != "win32"
if platform != "win32"
else VENV_DIR / "Scripts" / "pip.exe"
)

# Install requirements
os.system(command=f"{pip_exe} install -r requirements.txt")
system(command=f"{pip_exe} install -r requirements.txt")


if __name__ == "__main__":
print("Creating virtual environment...\n")
create_virtual_environment()

print("Installing requirements... (This may take a minute..)\n")
pip_install_requirements()

Expand All @@ -38,7 +39,7 @@ def pip_install_requirements() -> None:
"\nTo activate the virtual environment, "
"use the following command based on your platform:\n",
)
if sys.platform == "win32":
if platform == "win32":
print(
"\nFor Command Prompt:\n\t.venv\\Scripts\\activate.bat\n"
"\nFor PowerShell:\n\t.venv\\Scripts\\Activate.ps1\n",
Expand Down

0 comments on commit 1bb9393

Please sign in to comment.