Skip to content

Commit

Permalink
~
Browse files Browse the repository at this point in the history
  • Loading branch information
mohamed-chs committed Oct 19, 2023
1 parent 5223c11 commit 00952a4
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 45 deletions.
4 changes: 2 additions & 2 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"usernamehw.errorlens",
"njqdev.vscode-python-typehint",
"ms-python.python",
"matangover.mypy",
"ms-toolsai.jupyter",
"charliermarsh.ruff"
"charliermarsh.ruff",
"matangover.mypy"
]
}
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.analysis.typeCheckingMode": "strict"
"mypy.runUsingActiveInterpreter": true
}
12 changes: 4 additions & 8 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,15 @@
from shutil import rmtree
from typing import Any

from controllers.configuration import (
get_user_configs,
set_model_configs,
update_config_file,
)
from controllers.configuration import get_user_configs, save_configs, set_model_configs
from controllers.file_system import (
conversation_set_from_json,
conversation_set_from_zip,
create_n_save_all_weekwise_graphs,
generate_n_save_all_wordclouds,
get_bookmarklet_json_filepath,
save_conversation_set,
save_custom_instructions_to_file,
save_custom_instructions,
)
from models.conversation_set import ConversationSet

Expand Down Expand Up @@ -113,7 +109,7 @@ def main() -> None:

custom_instructions_filepath: Path = output_folder / "custom_instructions.json"

save_custom_instructions_to_file(
save_custom_instructions(
conv_set=all_conversations_set,
filepath=custom_instructions_filepath,
)
Expand All @@ -122,7 +118,7 @@ def main() -> None:
f"\nDone ✅ ! Check the output 📝 here : {custom_instructions_filepath.as_uri()} 🔗\n",
)

update_config_file(user_configs=configs_dict)
save_configs(user_configs=configs_dict)
print("(Settings ⚙️ have been updated and saved to 'config.json')\n")

print(
Expand Down
4 changes: 2 additions & 2 deletions config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"zip_file": null,
"output_folder": null,
"zip_file": "C:\\Users\\Mohamed.CheikhSidiya\\Downloads\\be0445bfde7aa6cc7beb67a750a167631e75b9fc9a3723eead4d20900dd7c73e-2023-09-29-02-21-47.zip",
"output_folder": "C:\\Users\\Mohamed.CheikhSidiya\\Documents\\ChatGPT Data",
"message": {
"author_headers": {
"system": "### System",
Expand Down
6 changes: 3 additions & 3 deletions controllers/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from models.node import Node
from views.prompt_user import prompt_user

from .file_system import default_output_folder, most_recently_downloaded_zip
from .file_system import default_output_folder, get_most_recently_downloaded_zip


def get_user_configs() -> dict[str, Any]:
Expand All @@ -21,15 +21,15 @@ def get_user_configs() -> dict[str, Any]:
default_configs = json_load(fp=file)

if not default_configs["zip_file"]:
default_configs["zip_file"] = most_recently_downloaded_zip()
default_configs["zip_file"] = get_most_recently_downloaded_zip()

if not default_configs["output_folder"]:
default_configs["output_folder"] = default_output_folder()

return prompt_user(default_configs=default_configs)


def update_config_file(user_configs: dict[str, Any]) -> None:
def save_configs(user_configs: dict[str, Any]) -> None:
"""Update the config file with the new configuration options."""
with open(file="config.json", mode="w", encoding="utf-8") as file:
json_dump(obj=user_configs, fp=file, indent=2)
Expand Down
3 changes: 1 addition & 2 deletions controllers/data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
Should ideally only return matplotlib objects, and not deal with the filesystem.
"""

# pyright: reportUnknownMemberType=false

from collections import defaultdict
from datetime import datetime
Expand Down Expand Up @@ -52,7 +51,7 @@ def weekwise_graph_from_timestamps(
if month_name:
ax.set_title(label=f"Prompt Count for {month_name}")

year: str | int = kwargs.get("year", "")
year: str = kwargs.get("year", "")
if year:
ax.set_title(label=f"Prompt Count for {year}")

Expand Down
22 changes: 8 additions & 14 deletions controllers/file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
may be replaced later, with a GUI or something)
"""

# pyright: reportUnknownMemberType=false


from datetime import datetime
from json import dump as json_dump
from json import load as json_load
from os import utime
from pathlib import Path
from typing import Any
from typing import Any, Literal
from zipfile import ZipFile

from tqdm import tqdm
Expand All @@ -30,7 +28,7 @@


def conversation_set_from_zip(zip_filepath: Path) -> ConversationSet:
"""Load the conversations from the OpenAI zip export file."""
"""Load the conversations from a zip file, containing a 'conversations.json' file."""
with ZipFile(file=zip_filepath, mode="r") as file:
file.extractall(path=zip_filepath.with_suffix(suffix=""))

Expand All @@ -45,7 +43,7 @@ def conversation_set_from_zip(zip_filepath: Path) -> ConversationSet:


def conversation_set_from_json(json_filepath: Path) -> ConversationSet:
"""Load the conversations from the bookmarklet json export file."""
"""Load the conversations from a JSON file, containing an array of conversations."""
with open(file=json_filepath, encoding="utf-8") as file:
conversations = json_load(fp=file)

Expand Down Expand Up @@ -81,7 +79,7 @@ def save_conversation_set(conv_set: ConversationSet, dir_path: Path) -> None:
def save_weekwise_graph_from_conversation_set(
conv_set: ConversationSet,
dir_path: Path,
time_period: tuple[datetime, str],
time_period: tuple[datetime, Literal["month", "year"]],
**kwargs: Any,
) -> None:
"""Create a weekwise graph and saves it to the folder."""
Expand Down Expand Up @@ -140,7 +138,7 @@ def create_n_save_all_weekwise_graphs(
def save_wordcloud_from_conversation_set(
conv_set: ConversationSet,
dir_path: Path,
time_period: tuple[datetime, str],
time_period: tuple[datetime, Literal["week", "month", "year"]],
**kwargs: Any,
) -> None:
"""Create a wordcloud and saves it to the folder."""
Expand All @@ -151,10 +149,8 @@ def save_wordcloud_from_conversation_set(
file_name = f"{time_period[0].strftime('%Y %B')}.png"
case "year":
file_name = f"{time_period[0].strftime('%Y')}.png"
case _:
raise ValueError("Invalid time period for wordcloud")

wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file( # type: ignore
wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file(
filename=dir_path / file_name,
)

Expand Down Expand Up @@ -203,7 +199,7 @@ def generate_n_save_all_wordclouds(
)


def save_custom_instructions_to_file(conv_set: ConversationSet, filepath: Path) -> None:
def save_custom_instructions(conv_set: ConversationSet, filepath: Path) -> None:
"""Create JSON file for custom instructions in the conversation set."""
with open(file=filepath, mode="w", encoding="utf-8") as file:
json_dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2)
Expand All @@ -215,7 +211,7 @@ def default_output_folder() -> str:
return str(object=Path.home() / "Documents" / "ChatGPT Data")


def most_recently_downloaded_zip() -> str:
def get_most_recently_downloaded_zip() -> str:
"""Path to the most recently created zip file in the Downloads folder."""
downloads_folder: Path = Path.home() / "Downloads"

Expand All @@ -233,15 +229,13 @@ def get_bookmarklet_json_filepath() -> Path | None:
"""Path to the most recently downloaded JSON file, with "bookmarklet" in the name."""
downloads_folder: Path = Path.home() / "Downloads"

# Filter out json files with names that do not contain "bookmarklet"
bookmarklet_json_files: list[Path] = [
x for x in downloads_folder.glob(pattern="*.json") if "bookmarklet" in x.name
]

if not bookmarklet_json_files:
return None

# Most recent json file in downloads folder, containing "bookmarklet"
bookmarklet_json_filepath: Path = max(
bookmarklet_json_files,
key=lambda x: x.stat().st_ctime,
Expand Down
14 changes: 10 additions & 4 deletions models/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from re import Pattern
from re import compile as re_compile
from time import ctime
from typing import Any
from typing import Any, Literal

from utils.utils import ensure_closed_code_blocks, replace_latex_delimiters

Expand Down Expand Up @@ -69,7 +69,9 @@ def _all_message_nodes(self) -> list[Node]:

return nodes

def _author_nodes(self, author: str) -> list[Node]:
def _author_nodes(
self, author: Literal["user", "assistant", "system", "tool"]
) -> list[Node]:
"""List of all nodes with the given author role in the conversation. (all branches)"""
return [
node
Expand Down Expand Up @@ -121,7 +123,9 @@ def message_count(self) -> int:
if node.message and node.message.author_role() in ("user", "assistant")
)

def entire_author_text(self, author: str) -> str:
def entire_author_text(
self, author: Literal["user", "assistant", "system", "tool"]
) -> str:
"""Entire raw text from the given author role in the conversation. (all branches)
Useful for generating word clouds.
Expand All @@ -132,7 +136,9 @@ def entire_author_text(self, author: str) -> str:
if node.message
)

def author_message_timestamps(self, author: str) -> list[float]:
def author_message_timestamps(
self, author: Literal["user", "assistant", "system", "tool"]
) -> list[float]:
"""List of all message timestamps from the given author role in the conversation.
(all branches) Useful for generating time series plots.
"""
Expand Down
10 changes: 7 additions & 3 deletions models/conversation_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from datetime import datetime
from time import ctime
from typing import Any
from typing import Any, Literal

from .conversation import Conversation

Expand Down Expand Up @@ -94,7 +94,9 @@ def all_custom_instructions(self) -> list[dict[str, Any]]:

return custom_instructions

def all_author_message_timestamps(self, author: str) -> list[float]:
def all_author_message_timestamps(
self, author: Literal["user", "assistant", "system", "tool"]
) -> list[float]:
"""Get a list of all message timestamps, in all conversations in the list."""
timestamps: list[float] = []

Expand All @@ -103,7 +105,9 @@ def all_author_message_timestamps(self, author: str) -> list[float]:

return timestamps

def all_author_text(self, author: str) -> str:
def all_author_text(
self, author: Literal["user", "assistant", "system", "tool"]
) -> str:
"""Get a string of all text, in all conversations in the list."""
return "\n".join(
conversation.entire_author_text(author=author)
Expand Down
8 changes: 2 additions & 6 deletions views/prompt_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,18 @@ def prompt_user(default_configs: dict[str, Any]) -> dict[str, Any]:

user_configs["wordcloud"] = {}

font_names: list[str] = get_font_names()

user_configs["wordcloud"]["font"] = select(
message="Select the font you want to use for the word clouds :",
choices=font_names,
choices=get_font_names(),
default=default_configs["wordcloud"]["font"]
if default_configs["wordcloud"]["font"]
else None,
style=custom_style,
).ask()

colormaps_list: list[str] = get_colormap_names()

user_configs["wordcloud"]["colormap"] = select(
message="Select the color theme you want to use for the word clouds :",
choices=colormaps_list,
choices=get_colormap_names(),
default=default_configs["wordcloud"]["colormap"]
if default_configs["wordcloud"]["colormap"]
else None,
Expand Down

0 comments on commit 00952a4

Please sign in to comment.