Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
mohamed-chs committed Oct 18, 2023
1 parent 1449ddd commit 30c065f
Show file tree
Hide file tree
Showing 18 changed files with 147 additions and 183 deletions.
24 changes: 0 additions & 24 deletions .github/workflows/pylint.yml

This file was deleted.

18 changes: 8 additions & 10 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
### TODO

Doing what needs to be done.
Doing what needs to be done. (not really needed, but I like to keep track of things)

Feel free to add or check items.

**general**

- [ ] More robust testing setup (I only have my data to test on and I do it by way of the eyes, which isn't very sustainable)
- [ ] More robust testing setup
- [ ] GUI
- [ ] Standalone executable, for an even smoother setup
- [ ] Obsidian plugin ?
- [x] keep external dependencies to a minimum (looking at you js)
- [-] keep external dependencies to a minimum (looking at you js)
- [x] Javascript to download more conversations, see [Javascript](js)
- [ ] Add new downloaded conversations to the MD folder
- [x] Add new downloaded conversations to the MD folder
- [ ] Update past conversations if changed
- [ ] Data visualizations : chat times, frequency, models, word clouds, etc...
- [-] Data visualizations : chat times, frequency, models, word clouds, etc...
- [ ] Data analysis : categories and more classifications based on topics, concepts, programming tools, etc ...
- [ ] Integration with Obsidian (folders and subfolders, tags, ...)
- [ ] Add HTML as an output option
Expand All @@ -27,22 +27,20 @@ Feel free to add or check items.
**visualizations**

- [ ] Rename fonts and colormaps to more human-readable names
- [ ] Reduce the number of fonts and colormaps (test and dump the ugly ones)
- [ ] Automatically make all word clouds for all time periods
- [ ] Number of fonts and colormaps should be reasonable, not too many, not too few

**command line**

- [x] Nicer command line output formatting
- [x] More configs from the command line (overwrite the config.json)
- [x] More configs from the command line
- [ ] Link to submit issues or feedback
- [ ] add more todos ...

**configs.json**
**configs**

- [x] change user, assistant, and system names
- [x] yaml header elements
- [ ] specific configs for each individual conversation / conversation type
- [ ] output folder (currently set by default or via command line arguments)
- [ ] add more configs ...

See also : [JavaScript Todo](js/how_to_use.md#still-working-on)
6 changes: 3 additions & 3 deletions controllers/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

def get_user_configs() -> dict[str, Any]:
"""Loads the default configs and calls the prompt_user function with those defaults.
Returns the new configuration."""

with open(file="config.json", mode="r", encoding="utf-8") as file:
Returns the new configuration.
"""
with open(file="config.json", encoding="utf-8") as file:
default_configs = load(fp=file)

if not default_configs["zip_file"]:
Expand Down
24 changes: 14 additions & 10 deletions controllers/data_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Module for all the data visualizations.
Should ideally only return matplotlib objects, and not deal with the filesystem."""
Should ideally only return matplotlib objects, and not deal with the filesystem.
"""

from pathlib import Path
from typing import Any
Expand All @@ -20,7 +21,6 @@
# Ensure that the stopwords are downloaded
def load_nltk_stopwords() -> set[str]:
"""Loads the nltk stopwords. Returns a set of stopwords."""

try:
nltk.data.find(resource_name="corpora/stopwords") # type: ignore
except LookupError:
Expand All @@ -36,7 +36,9 @@ def load_nltk_stopwords() -> set[str]:
] # add more languages here ...

stop_words = set(
word for lang in languages for word in stopwords.words(fileids=lang) # type: ignore
word
for lang in languages
for word in stopwords.words(fileids=lang) # type: ignore
)

return stop_words
Expand All @@ -47,7 +49,6 @@ def wordcloud_from_text(
**kwargs: Any,
) -> WordCloud:
"""Creates a wordcloud from the given text. Returns a WordCloud object."""

custom_stopwords: list[str] = kwargs.get("stopwords", [])
default_stopwords: set[str] = load_nltk_stopwords()
stop_words: set[str] = default_stopwords.union(set(custom_stopwords))
Expand All @@ -61,7 +62,8 @@ def wordcloud_from_text(

wordcloud: WordCloud = WordCloud(
font_path=kwargs.get(
"font_path", "assets/fonts/ArchitectsDaughter-Regular.ttf"
"font_path",
"assets/fonts/ArchitectsDaughter-Regular.ttf",
),
width=kwargs.get("width", 1000),
height=kwargs.get("height", 1000),
Expand All @@ -71,17 +73,17 @@ def wordcloud_from_text(
colormap=kwargs.get("colormap", "prism"),
include_numbers=kwargs.get("include_numbers", False),
).generate( # type: ignore
text=text
text=text,
)

return wordcloud


def wordcloud_from_conversation_set(
conv_set: ConversationSet, **kwargs: Any
conv_set: ConversationSet,
**kwargs: Any,
) -> WordCloud:
"""Creates a wordcloud from the given conversation set. Returns a WordCloud object."""

text: str = (
conv_set.all_author_text(author="user")
+ "\n"
Expand All @@ -93,7 +95,6 @@ def wordcloud_from_conversation_set(

def create_save_graph(timestamps: list[float], file_path: Path) -> None:
"""Creates and saves a graph from the given timestamps."""

df = pd.DataFrame(data=timestamps, columns=["timestamp"]) # type: ignore
df["datetime"] = pd.to_datetime(arg=df["timestamp"], unit="s") # type: ignore

Expand All @@ -113,7 +114,10 @@ def create_save_graph(timestamps: list[float], file_path: Path) -> None:
)

plt.title( # type: ignore
label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20
label="ChatGPT Prompts per Day",
fontsize=20,
fontweight="bold",
pad=20,
)
plt.xlabel(xlabel="Month", fontsize=16, labelpad=15) # type: ignore
plt.ylabel(ylabel="Number of Prompts", fontsize=16, labelpad=15) # type: ignore
Expand Down
46 changes: 25 additions & 21 deletions controllers/file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
(besides utils.py, but that doesn't save anything to disk,
and configuration.py, but that's a placeholder for user input in whatever form,
may be replaced later, with a GUI or something)"""
may be replaced later, with a GUI or something)
"""

from datetime import datetime
from json import dump, load
Expand All @@ -23,24 +24,22 @@

def load_conversations_from_openai_zip(zip_filepath: Path) -> ConversationSet:
"""Load the conversations from the OpenAI zip export file."""

with ZipFile(file=zip_filepath, mode="r") as file:
file.extractall(path=zip_filepath.with_suffix(suffix=""))

conversations_path: Path = (
zip_filepath.with_suffix(suffix="") / "conversations.json"
)

with open(file=conversations_path, mode="r", encoding="utf-8") as file:
with open(file=conversations_path, encoding="utf-8") as file:
conversations = load(fp=file)

return ConversationSet(conversations=conversations)


def load_conversations_from_bookmarklet_json(json_filepath: Path) -> ConversationSet:
"""Load the conversations from the bookmarklet json export file."""

with open(file=json_filepath, mode="r", encoding="utf-8") as file:
with open(file=json_filepath, encoding="utf-8") as file:
conversations = load(fp=file)

return ConversationSet(conversations=conversations)
Expand All @@ -54,7 +53,7 @@ def save_conversation_to_file(conversation: Conversation, filepath: Path) -> Non
while filepath.exists():
counter += 1
filepath = filepath.with_name(
name=f"{base_file_name} ({counter}){filepath.suffix}"
name=f"{base_file_name} ({counter}){filepath.suffix}",
)

with open(file=filepath, mode="w", encoding="utf-8") as file:
Expand All @@ -65,7 +64,8 @@ def save_conversation_to_file(conversation: Conversation, filepath: Path) -> Non
def save_conversation_set_to_dir(conv_set: ConversationSet, dir_path: Path) -> None:
"""Save a conversation set to a directory, one markdown file per conversation."""
for conversation in tqdm(
iterable=conv_set.conversation_list, desc="Writing Markdown 📄 files"
iterable=conv_set.conversation_list,
desc="Writing Markdown 📄 files",
):
file_path: Path = dir_path / f"{conversation.sanitized_title()}.md"
save_conversation_to_file(conversation=conversation, filepath=file_path)
Expand All @@ -89,21 +89,23 @@ def save_wordcloud_from_conversation_set(
raise ValueError("Invalid time period for wordcloud")

wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file( # type: ignore
filename=dir_path / file_name
filename=dir_path / file_name,
)


def generate_all_wordclouds(
conv_set: ConversationSet, dir_path: Path, **kwargs: Any
conv_set: ConversationSet,
dir_path: Path,
**kwargs: Any,
) -> None:
"""Create the wordclouds and save them to the folder."""

weeks_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_week()
months_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_month()
years_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_year()

for week in tqdm(
iterable=weeks_dict.keys(), desc="Creating weekly wordclouds 🔡☁️ "
iterable=weeks_dict.keys(),
desc="Creating weekly wordclouds 🔡☁️ ",
):
save_wordcloud_from_conversation_set(
conv_set=weeks_dict[week],
Expand All @@ -113,7 +115,8 @@ def generate_all_wordclouds(
)

for month in tqdm(
iterable=months_dict.keys(), desc="Creating monthly wordclouds 🔡☁️ "
iterable=months_dict.keys(),
desc="Creating monthly wordclouds 🔡☁️ ",
):
save_wordcloud_from_conversation_set(
conv_set=months_dict[month],
Expand All @@ -123,7 +126,8 @@ def generate_all_wordclouds(
)

for year in tqdm(
iterable=years_dict.keys(), desc="Creating yearly wordclouds 🔡☁️ "
iterable=years_dict.keys(),
desc="Creating yearly wordclouds 🔡☁️ ",
):
save_wordcloud_from_conversation_set(
conv_set=years_dict[year],
Expand All @@ -135,23 +139,22 @@ def generate_all_wordclouds(

def save_custom_instructions_to_file(conv_set: ConversationSet, filepath: Path) -> None:
"""Create JSON file for custom instructions in the conversation set."""

with open(file=filepath, mode="w", encoding="utf-8") as file:
dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2)


def default_output_folder() -> str:
"""Returns the default output folder path.
(put the function in a separate file to isolate file system operations)"""

(put the function in a separate file to isolate file system operations)
"""
return str(object=Path.home() / "Documents" / "ChatGPT Data")


def get_openai_zip_filepath() -> str:
"""Returns the path to the most recent zip file in the Downloads folder,
excluding those containing 'bookmarklet'."""

excluding those containing 'bookmarklet'.
"""
downloads_folder: Path = Path.home() / "Downloads"

# Filter out zip files with names that contain "bookmarklet"
Expand All @@ -170,8 +173,8 @@ def get_openai_zip_filepath() -> str:

def get_bookmarklet_json_filepath() -> Path | None:
"""Returns the path to the most recent json file in the Downloads folder,
containing 'bookmarklet'."""

containing 'bookmarklet'.
"""
downloads_folder: Path = Path.home() / "Downloads"

# Filter out json files with names that do not contain "bookmarklet"
Expand All @@ -184,7 +187,8 @@ def get_bookmarklet_json_filepath() -> Path | None:

# Most recent json file in downloads folder, containing "bookmarklet"
bookmarklet_json_filepath: Path = max(
bookmarklet_json_files, key=lambda x: x.stat().st_ctime
bookmarklet_json_files,
key=lambda x: x.stat().st_ctime,
)

return bookmarklet_json_filepath
2 changes: 0 additions & 2 deletions js/how_to_use.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,4 @@ Feel free to modify the script to your liking. Would also appreciate sharing the
- [ ] better widget UI (add error messages and progress and such,
so you can close the dev console and still be kept informed on the download process)
- [ ] add instructions on how to create a bookmarklet
(how to minify the js script, make it url valid, then creating the bookmark in the browser.
Maybe do all these in-house? but that might need the uglify-js npm dependency ...)
- [ ] more todos ...
Loading

0 comments on commit 30c065f

Please sign in to comment.