-
Notifications
You must be signed in to change notification settings - Fork 0
/
organizer.py
189 lines (163 loc) · 8.08 KB
/
organizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import json
import shutil
import os
import time
from prompt import Prompt
from descriptor import ImageDescriber
class FileOrganizer:
def __init__(self, target_path):
self.user_prompt = "Organize the following files. Infer project name from file type, file name and description. You can put files in existing folders if they make sense. You can create folders. Only create folders that represent categories.\n"
self.system_prompt = "You are a file organizer, attempt to clean up the files into folders and optionally rename them to be clearer. Don't rename files that already have good descriptive names. Group things by similarity and description when possible"
self.schema = """{
"file_remap": {
"ExampleName.png": "Images/BetterExampleName.png",
"file2.txt": "Notes/file2.txt",
...,
"Some_Bad_Format.ext": "Folder/SomeGoodFormat.ext"
}
}"""
self.target_path = target_path
self.created_folders = set()
print("Finding files and folders...")
self.files = [f for f in os.listdir(self.target_path) if os.path.isfile(
os.path.join(self.target_path, f))]
self.folders = [f for f in os.listdir(self.target_path) if os.path.isdir(
os.path.join(self.target_path, f))]
# We want the localized path to the folders under the target path
self.local_folders = [os.path.relpath(os.path.join(self.target_path, f), self.target_path)
for f in os.listdir(self.target_path)
if os.path.isdir(os.path.join(self.target_path, f))]
print("Found " + str(len(self.files)) + " files and " +
str(len(self.folders)) + " folders")
print(self.local_folders)
print("Initializing ImageDescriber... this can take a few minutes")
self.image_descriptor = ImageDescriber()
def generate_prompt(self, file_metadata, folder_metadata):
prompt = self.user_prompt
existing_folders = self.created_folders | set(self.folders)
if len(existing_folders) > 0:
prompt += "Existing folders you can optionally use: "
prompt += ", ".join(existing_folders)
prompt += "\n\n"
prompt += "Files to organize:\n"
# Include name, date and file extension
for file in file_metadata:
# If we have an image file, include the description
if file['file_ext'] == '.png' or file['file_ext'] == '.jpg':
prompt += f"{file['file_name']}{file['file_ext']}. Image Description: {file['description']}. Created on: {file['creation_date']}\n"
else:
prompt += f"{file['file_name']}{file['file_ext']}. Created on: {file['creation_date']}\n"
if len(folder_metadata) > 0:
prompt += "\n\nExisting Folders:\n"
# Include existing folders
for folder in folder_metadata:
if os.path.isdir(os.path.join(self.target_path, folder)):
prompt += f"{folder}\n"
return prompt
def get_file_meta_batch(self, files, range=[0, 20]):
file_metadata = []
if range[1] > len(files):
range[1] = len(files)
for file in files[range[0]:range[1]]:
metadata = {}
# Get the file extension
metadata['file_ext'] = os.path.splitext(file)[1]
metadata['file_name'] = os.path.splitext(file)[0]
# Get the creation time
creation_time = os.path.getctime(
os.path.join(self.target_path, file))
# Convert the creation time to a human readable format of just the month name, day and year
metadata['creation_date'] = time.strftime(
"%B %d, %Y", time.localtime(creation_time))
# If this file is an image, classify it
if metadata['file_ext'] == '.png' or metadata['file_ext'] == '.jpg':
# Get the image path
image_path = os.path.join(self.target_path, file)
# Classify the image
classification = self.image_descriptor.classify_image(
image_path, "Description:")
# Get the description
metadata['description'] = classification
file_metadata.append(metadata)
return file_metadata
# Unused for now, but will likely be leveraged in the future
def get_folder_meta_batch(self, folders, range=[0, 20]):
folder_metadata = []
if range[1] > len(folders):
range[1] = len(folders)
for folder in folders[range[0]:range[1]]:
metadata = {}
# Get the file extension
metadata['folder_name'] = folder
# Date created
creation_time = os.path.getctime(
os.path.join(self.target_path, folder))
# Convert the creation time to a human readable format
metadata['creation_date'] = time.strftime(
"%B %d, %Y", time.localtime(creation_time))
# Is the folder empty?
metadata['is_empty'] = len(
os.listdir(os.path.join(self.target_path, folder))) == 0
folder_metadata.append(metadata)
return folder_metadata
def move_and_rename_files(self, mapping):
# Validate the JSON
try:
json.loads(mapping)
except Exception as e:
print(f'Error parsing JSON: {e}')
return
# Load JSON mapping
file_remap = json.loads(mapping)['file_remap']
for old_name, new_name in file_remap.items():
old_file_path = os.path.join(self.target_path, old_name)
new_file_path = os.path.join(self.target_path, new_name)
try:
if os.path.isfile(new_file_path):
print(
f'File {new_file_path} already exists, skipping, so we don\'t overwrite it!')
continue
# Check if old file exists
if os.path.isfile(old_file_path):
# Determine if we need to create folders
if os.path.dirname(new_file_path) != '':
new_folder_name = os.path.dirname(new_file_path)
new_folder_name = new_folder_name.replace(
self.target_path, '')
# We need to keep track of the folders we create so we can include them in the prompt (due to batching)
self.created_folders.add(new_folder_name)
# Create new directories if they don't exist
os.makedirs(os.path.dirname(new_file_path), exist_ok=True)
print(f'Moving file {old_file_path} -> {new_file_path}')
# Rename and move the file
shutil.move(old_file_path, new_file_path)
else:
print(f'File {old_file_path} does not exist')
except Exception as e:
# Couldn't move the file
print(
f'Error moving file {old_file_path} to {new_file_path}: {e}')
def organize_files(self, batch_size=20):
print("Organizing files...")
for i in range(0, len(self.files), batch_size):
print("Batch " + str(i) + " to " + str(i+batch_size) + ":")
batch = self.get_file_meta_batch(
self.files, range=[i, i+batch_size])
prompt = self.generate_prompt(batch, {})
print("\n")
while True:
try:
response = Prompt(
prompt,
self.system_prompt
)
result = response.call(schema=self.schema, tokens=2048)
break
except Exception as e:
print(f'Failed to reach GPT: {e}')
time.sleep(2)
self.move_and_rename_files(mapping=json.dumps(result))
time.sleep(0.5)
print("Created Folders:")
print(self.created_folders)
print("Files organized!")