Skip to content

Commit

Permalink
~
Browse files Browse the repository at this point in the history
  • Loading branch information
mohamed-chs committed Oct 17, 2023
1 parent ced6429 commit 1449ddd
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 93 deletions.
4 changes: 2 additions & 2 deletions .vscode/extensions.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"usernamehw.errorlens",
"njqdev.vscode-python-typehint",
"ms-python.python",
"ms-python.pylint",
"matangover.mypy",
"ms-toolsai.jupyter"
"ms-toolsai.jupyter",
"charliermarsh.ruff"
]
}
17 changes: 10 additions & 7 deletions controllers/data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import matplotlib.pyplot as plt
import nltk # type: ignore
import pandas as pd
from matplotlib.axes import Axes
from nltk.corpus import stopwords # type: ignore
from pandas.core.series import Series
from wordcloud import WordCloud # type: ignore
Expand Down Expand Up @@ -77,23 +78,23 @@ def wordcloud_from_text(


def wordcloud_from_conversation_set(
conversation_set: ConversationSet, **kwargs: Any
conv_set: ConversationSet, **kwargs: Any
) -> WordCloud:
"""Creates a wordcloud from the given conversation set. Returns a WordCloud object."""

text: str = (
conversation_set.all_author_text(author="user")
conv_set.all_author_text(author="user")
+ "\n"
+ conversation_set.all_author_text(author="assistant")
+ conv_set.all_author_text(author="assistant")
)

return wordcloud_from_text(text=text, **kwargs)


def create_save_graph(all_timestamps: list[float], file_path: Path) -> None:
def create_save_graph(timestamps: list[float], file_path: Path) -> None:
"""Creates and saves a graph from the given timestamps."""

df = pd.DataFrame(data=all_timestamps, columns=["timestamp"]) # type: ignore
df = pd.DataFrame(data=timestamps, columns=["timestamp"]) # type: ignore
df["datetime"] = pd.to_datetime(arg=df["timestamp"], unit="s") # type: ignore

daily_counts: Series = df.groupby(by=df["datetime"].dt.date).size() # type: ignore
Expand All @@ -111,13 +112,15 @@ def create_save_graph(all_timestamps: list[float], file_path: Path) -> None:
markeredgewidth=0.5,
)

plt.title(label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20) # type: ignore
plt.title( # type: ignore
label="ChatGPT Prompts per Day", fontsize=20, fontweight="bold", pad=20
)
plt.xlabel(xlabel="Month", fontsize=16, labelpad=15) # type: ignore
plt.ylabel(ylabel="Number of Prompts", fontsize=16, labelpad=15) # type: ignore
plt.xticks(fontsize=14) # type: ignore
plt.yticks(fontsize=14) # type: ignore

ax = plt.gca() # type: ignore
ax: Axes = plt.gca()
ax.xaxis.set_major_locator(locator=mdates.MonthLocator()) # type: ignore
ax.xaxis.set_major_formatter(formatter=mdates.DateFormatter(fmt="%B")) # type: ignore

Expand Down
61 changes: 28 additions & 33 deletions controllers/file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
and configuration.py, but that's a placeholder for user input in whatever form,
may be replaced later, with a GUI or something)"""


from datetime import datetime
from json import dump, load
from os import utime
Expand Down Expand Up @@ -42,41 +41,39 @@ def load_conversations_from_bookmarklet_json(json_filepath: Path) -> Conversatio
"""Load the conversations from the bookmarklet json export file."""

with open(file=json_filepath, mode="r", encoding="utf-8") as file:
conversations = load(file)
conversations = load(fp=file)

return ConversationSet(conversations=conversations)


def save_conversation_to_file(conversation: Conversation, file_path: Path) -> None:
def save_conversation_to_file(conversation: Conversation, filepath: Path) -> None:
"""Save a conversation to a file, with added modification time."""
base_file_name: str = file_path.stem
base_file_name: str = filepath.stem

counter = 0
while file_path.exists():
while filepath.exists():
counter += 1
file_path = file_path.with_name(
name=f"{base_file_name} ({counter}){file_path.suffix}"
filepath = filepath.with_name(
name=f"{base_file_name} ({counter}){filepath.suffix}"
)

with open(file=file_path, mode="w", encoding="utf-8") as file:
with open(file=filepath, mode="w", encoding="utf-8") as file:
file.write(conversation.to_markdown())
utime(path=file_path, times=(conversation.update_time, conversation.update_time))
utime(path=filepath, times=(conversation.update_time, conversation.update_time))


def save_conversation_set_to_dir(
conversation_set: ConversationSet, dir_path: Path
) -> None:
def save_conversation_set_to_dir(conv_set: ConversationSet, dir_path: Path) -> None:
"""Save a conversation set to a directory, one markdown file per conversation."""
for conversation in tqdm(
iterable=conversation_set.conversation_list, desc="Writing Markdown 📄 files"
iterable=conv_set.conversation_list, desc="Writing Markdown 📄 files"
):
file_path: Path = dir_path / f"{conversation.sanitized_title()}.md"
save_conversation_to_file(conversation=conversation, file_path=file_path)
save_conversation_to_file(conversation=conversation, filepath=file_path)


def save_wordcloud_from_conversation_set(
conversation_set: ConversationSet,
folder_path: Path,
conv_set: ConversationSet,
dir_path: Path,
time_period: tuple[datetime, str],
**kwargs: Any,
) -> None:
Expand All @@ -91,26 +88,26 @@ def save_wordcloud_from_conversation_set(
case _:
raise ValueError("Invalid time period for wordcloud")

wordcloud_from_conversation_set(conversation_set=conversation_set, **kwargs).to_file( # type: ignore
filename=folder_path / file_name
wordcloud_from_conversation_set(conv_set=conv_set, **kwargs).to_file( # type: ignore
filename=dir_path / file_name
)


def generate_all_wordclouds(
conversation_set: ConversationSet, folder_path: Path, **kwargs: Any
conv_set: ConversationSet, dir_path: Path, **kwargs: Any
) -> None:
"""Create the wordclouds and save them to the folder."""

weeks_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_week()
months_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_month()
years_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_year()
weeks_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_week()
months_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_month()
years_dict: dict[datetime, ConversationSet] = conv_set.grouped_by_year()

for week in tqdm(
iterable=weeks_dict.keys(), desc="Creating weekly wordclouds 🔡☁️ "
):
save_wordcloud_from_conversation_set(
conversation_set=weeks_dict[week],
folder_path=folder_path,
conv_set=weeks_dict[week],
dir_path=dir_path,
time_period=(week, "week"),
**kwargs,
)
Expand All @@ -119,8 +116,8 @@ def generate_all_wordclouds(
iterable=months_dict.keys(), desc="Creating monthly wordclouds 🔡☁️ "
):
save_wordcloud_from_conversation_set(
conversation_set=months_dict[month],
folder_path=folder_path,
conv_set=months_dict[month],
dir_path=dir_path,
time_period=(month, "month"),
**kwargs,
)
Expand All @@ -129,20 +126,18 @@ def generate_all_wordclouds(
iterable=years_dict.keys(), desc="Creating yearly wordclouds 🔡☁️ "
):
save_wordcloud_from_conversation_set(
conversation_set=years_dict[year],
folder_path=folder_path,
conv_set=years_dict[year],
dir_path=dir_path,
time_period=(year, "year"),
**kwargs,
)


def save_custom_instructions_to_file(
conversation_set: ConversationSet, file_path: Path
) -> None:
def save_custom_instructions_to_file(conv_set: ConversationSet, filepath: Path) -> None:
"""Create JSON file for custom instructions in the conversation set."""

with open(file=file_path, mode="w", encoding="utf-8") as file:
dump(obj=conversation_set.all_custom_instructions(), fp=file, indent=2)
with open(file=filepath, mode="w", encoding="utf-8") as file:
dump(obj=conv_set.all_custom_instructions(), fp=file, indent=2)


def default_output_folder() -> str:
Expand Down
14 changes: 6 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def main() -> None:
json_filepath=bookmarklet_json_filepath
)
)
all_conversations_set.update(conversation_set=bookmarklet_conversations_set)
all_conversations_set.update(conv_set=bookmarklet_conversations_set)

output_folder = Path(configs_dict["output_folder"])

Expand All @@ -78,7 +78,7 @@ def main() -> None:
markdown_folder.mkdir(parents=True, exist_ok=True)

save_conversation_set_to_dir(
conversation_set=all_conversations_set, dir_path=markdown_folder
conv_set=all_conversations_set, dir_path=markdown_folder
)

print(f"\nDone ✅ ! Check the output 📄 here : {markdown_folder.as_uri()} 🔗\n")
Expand All @@ -91,9 +91,7 @@ def main() -> None:
graph_path: Path = graph_folder / "all messages.png"

create_save_graph(
all_timestamps=all_conversations_set.all_author_message_timestamps(
author="user"
),
timestamps=all_conversations_set.all_author_message_timestamps(author="user"),
file_path=graph_path,
)

Expand All @@ -107,8 +105,8 @@ def main() -> None:
colormap = configs_dict["wordcloud"]["colormap"]

generate_all_wordclouds(
conversation_set=all_conversations_set,
folder_path=wordcloud_folder,
conv_set=all_conversations_set,
dir_path=wordcloud_folder,
font_path=font_path,
colormap=colormap,
)
Expand All @@ -120,7 +118,7 @@ def main() -> None:
custom_instructions_filepath: Path = output_folder / "custom_instructions.json"

save_custom_instructions_to_file(
conversation_set=all_conversations_set, file_path=custom_instructions_filepath
conv_set=all_conversations_set, filepath=custom_instructions_filepath
)

print(
Expand Down
18 changes: 9 additions & 9 deletions models/conversation_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,20 @@ def __init__(self, conversations: list[dict[str, Any]]) -> None:

self.conversation_list = list(self.conversation_dict.values())

def add_conversation(self, conversation: Conversation) -> None:
def add_conversation(self, conv: Conversation) -> None:
"""Add a conversation to the dictionary and list."""
self.conversation_dict[conversation.conversation_id] = conversation
self.conversation_list.append(conversation)
self.conversation_dict[conv.conversation_id] = conv
self.conversation_list.append(conv)

def last_updated(self) -> float:
"""Returns the timestamp of the last updated conversation in the list."""
return max(conversation.update_time for conversation in self.conversation_list)

def update(self, conversation_set: "ConversationSet") -> None:
def update(self, conv_set: "ConversationSet") -> None:
"""Update the conversation set with the new one."""
if conversation_set.last_updated() <= self.last_updated():
if conv_set.last_updated() <= self.last_updated():
return
self.conversation_dict.update(conversation_set.conversation_dict)
self.conversation_dict.update(conv_set.conversation_dict)
self.conversation_list = list(self.conversation_dict.values())

def grouped_by_week(self) -> dict[datetime, "ConversationSet"]:
Expand All @@ -51,7 +51,7 @@ def grouped_by_week(self) -> dict[datetime, "ConversationSet"]:
week_start: datetime = conversation.start_of_week()
if week_start not in grouped:
grouped[week_start] = ConversationSet(conversations=[])
grouped[week_start].add_conversation(conversation=conversation)
grouped[week_start].add_conversation(conv=conversation)
return grouped

def grouped_by_month(self) -> dict[datetime, "ConversationSet"]:
Expand All @@ -61,7 +61,7 @@ def grouped_by_month(self) -> dict[datetime, "ConversationSet"]:
month_start: datetime = conversation.start_of_month()
if month_start not in grouped:
grouped[month_start] = ConversationSet(conversations=[])
grouped[month_start].add_conversation(conversation=conversation)
grouped[month_start].add_conversation(conv=conversation)
return grouped

def grouped_by_year(self) -> dict[datetime, "ConversationSet"]:
Expand All @@ -71,7 +71,7 @@ def grouped_by_year(self) -> dict[datetime, "ConversationSet"]:
year_start: datetime = conversation.start_of_year()
if year_start not in grouped:
grouped[year_start] = ConversationSet(conversations=[])
grouped[year_start].add_conversation(conversation=conversation)
grouped[year_start].add_conversation(conv=conversation)
return grouped

def all_custom_instructions(self) -> list[dict[str, Any]]:
Expand Down
4 changes: 1 addition & 3 deletions models/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ def author_header(self) -> str:
def content_text(self) -> str:
"""get the text content of the message."""
if "parts" in self.content:
return str(
object=self.content["parts"][0]
) # suggested by @turnboughsg, pr #24
return str(object=self.content["parts"][0])
if "text" in self.content:
return f"```python\n{self.content['text']}\n```"
return ""
Expand Down
10 changes: 5 additions & 5 deletions models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ class Node:

def __init__(
self,
node_id: str,
message: Message | None,
n_id: str,
msg: Message | None,
parent: Optional["Node"],
children: Optional[list["Node"]],
) -> None:
self.id: str = node_id
self.message: Message | None = message
self.id: str = n_id
self.message: Message | None = msg
self.parent: Node | None = parent
self.children: list[Node] = children if children else []

Expand All @@ -45,7 +45,7 @@ def nodes_from_mapping(mapping: dict[str, Any]) -> dict[str, "Node"]:
message: Message | None = (
Message(message=value["message"]) if value.get("message") else None
)
nodes[key] = Node(node_id=key, message=message, parent=None, children=None)
nodes[key] = Node(n_id=key, msg=message, parent=None, children=None)

# Second pass: Connect nodes
for key, value in mapping.items():
Expand Down
14 changes: 5 additions & 9 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
matplotlib
nltk
pandas
questionary
seaborn
tqdm
wordcloud
pylint
-r requirements.txt
pytest
mypy
pytest
ruff
black
isort
4 changes: 2 additions & 2 deletions tests/test_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ def sample_conversation_data() -> dict[str, Any]:

return {
"title": "Sample Conversation",
"create_time": 1642540800.0, # Jan 19, 2022, 12:00:00 PM
"update_time": 1642540900.0, # Jan 19, 2022, 12:01:40 PM
"create_time": 1642540800.0, # Jan 18, 2022, 12:00:00 PM
"update_time": 1642540900.0, # Jan 18, 2022, 12:01:40 PM
"mapping": {
"node1": {
"message": {
Expand Down
Loading

0 comments on commit 1449ddd

Please sign in to comment.