-
Notifications
You must be signed in to change notification settings - Fork 4
/
csv_to_md-file.py
384 lines (344 loc) · 19.9 KB
/
csv_to_md-file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
import csv
import re
from settings import Settings
import os
if os.path.isfile("saved_settings.py"):
print("saved_settings.py is already present")
else:
with open("saved_settings.py", "w", encoding='utf-8') as f:
f.write( "\"\"\"This is your settings file.\"\"\"\n\n")
from saved_settings import *
class GetInput:
def __init__(self):
self.settings:dict = {}
def choices(self):
# check if the user wants to load settings
self.loadSettings:str = ""
while True:
self.loadSettings = input("Do you want to load saved settings? Enter \"y\" for yes and \"n\" for no: ")
self.loadSettings = self.loadSettings.lower().strip()
if self.loadSettings == "y":
choice = input("Which settings do you want to choose? Please enter the name of the dictionary in saved_settings.py: ")
choice = choice.lower().strip()
# this will load the dictionary from saved_settings.py to the current settings
# this works because we import everything from saved_settings
self.settings = eval(choice)
print(f"These are your selected settings: {self.settings}")
break
elif self.loadSettings == "n":
instantiateSettings = Settings()
# set the settings and load them
# it calls the setGeneralSettings method in settings.py and lets the user choose all
# the settings which are not related to the formatting of the content
generalSettings = instantiateSettings.setGeneralSettings()
self.settings = generalSettings
print(f"These are your current general settings: {self.settings}")
break
self.mdChoices()
def mdChoices(self):
if self.settings["addYAML"] == "y":
# instantiate ReadCreate for reading the CSV files
instanceFile = ReadCreate(self.settings, True)
if self.loadSettings == "n":
print("Now you will set the formatting for all of the columns one by one.")
instanceFile.getCellSettings()
instanceFile.getYamlKeys()
# creates the md files
instanceFile.makeMdFiles()
elif self.settings["inlineYAML"] == "y":
# instantiate ReadCreate for reading the CSV files
instanceFile = ReadCreate(self.settings, False, True)
if self.loadSettings == "n":
print("Now you will set the formatting for all of the columns one by one.")
instanceFile.getCellSettings()
instanceFile.getYamlKeys()
# creates the md files
instanceFile.makeMdFiles()
else:
# instantiates the ReadCreate class and finds the csv files
instanceFile = ReadCreate(self.settings, False)
if self.loadSettings == "n":
print("Now you will set the formatting for all of the columns one by one.")
instanceFile.getCellSettings()
# creates the md files
instanceFile.makeMdFiles()
class ReadCreate:
def __init__(self, settings:dict, yaml:bool=False, inlineyaml:bool=False):
# will be populated with the contents of the first row of the first csv file as the values
# the key is `yaml_keys`
self.keyList:list = []
self.yaml:bool = yaml
self.inlineyaml:bool = inlineyaml
self.csvFiles:list= []
# instantiate appSettings with the GetInput class to get access to the user settings
self.settings = settings
# find the csv files in the current working directory
for dirpath, dirnames, files in os.walk("."):
#print(f"Found directory: {dirnames}, located here:{dirpath}")
for file_name in files:
if file_name.endswith(".csv"):
normalised_path = os.path.normpath(dirpath + "/" + file_name)
print(f"Found file: {file_name}")
# append each found csv file to the list of csv files
self.csvFiles.append(normalised_path)
def getYamlKeys(self):
self.keys:list = []
with open(self.csvFiles[0], "r", encoding='utf-8') as csvFile:
csvFileReader = csv.reader(csvFile, delimiter=self.settings["delimiter"])
for row in csvFileReader:
# puts all of the keys for each file in a list
# checks for the first row because that contains the names of the columns
# should be redundant because I break after the first row anyway
if csvFileReader.line_num == 1:
# is needed to make sure that the list of all available options is only shown for the first column of the first row
for el in range(len(row)):
self.keys.append(row[el])
# break so that it doesn't loop again
break
def getCellSettings(self):
# this method will add a key to self.settings
# the key is "column", the value is a dictionary that contains the column in index form as its key
# and the formatting as its value in a list
# the first value is the formatting; if the cell in the csv file contains a list (multiple values that should get the
# formatting individually), the second element is "y" for yes and the third the separator for the list
# needed for showing and checking the chosen formatting option for each cell
# needed for saving the entered settings
addMdSetting = Settings()
with open(self.csvFiles[0], "r", encoding='utf-8') as csvFile:
csvFileReader = csv.reader(csvFile, delimiter=self.settings["delimiter"])
for row in csvFileReader:
# checks for the first row because that contains the names of the columns
# should be redundant because the first row is the first in the iterator and I break after the first row
if csvFileReader.line_num == 1:
# is needed to make sure that the list of all available options is only shown for the first column of the first row
showOptions:int = 0
for el in range(len(row)):
# this indented part gets the settings for all of the columns
# only set the settings for the first csv file (assume that the others follow the same scheme)
cellFormatting:str = ""
inCellList:str = ""
separator:str = ""
# only show the options for the first element
if showOptions == 0:
print(addMdSetting.availableSettings)
print(f"The file used for setting the format for all csv files in the current working directory is \"{self.csvFiles[0]}\".")
showOptions += 1
# set the formatting of each cell
while True:
cellFormatting = input(f"How should the column \"{row[el]}\" be formatted? ")
cellFormatting = cellFormatting.lower().strip()
# check if the chosen formatting is valid
if cellFormatting in addMdSetting.availableSettingsList:
break
# asks whether a cell contains multiple elements that should be separately formatted
while True:
# don't ask whether the cell contains multiple values if there's no formatting to be applied
if cellFormatting == "n":
break
inCellList = input("Does this cell contain multiple values which should be separately formatted? \"y\" for yes and \"n\" for no: ")
if inCellList.lower() == "n":
break
elif inCellList.lower() == "y":
separator = input("How is your list separated? Please enter the character: ")
break
# adds the formatting to self.settings with "column" as key
# the value is a dictionary, whose key is the index of the column and the cell formatting as string in a list as its value
if len(inCellList) > 0 and len(separator) > 0:
self.settings["column"][el] = [cellFormatting, inCellList, separator]
else:
self.settings["column"][el] = [cellFormatting]
# ask the user whether they want to save their settings in saved_settings.py
while True:
save_settings = input("Do you want to save these settings? Enter \"y\" for yes and \"n\" for no: ")
if save_settings.lower().strip() == "y":
addMdSetting.saveSettings(self.settings)
break
elif save_settings.lower().strip() == "n":
break
# stop after the first row, so that it doesn't keep checking all the other rows after it
break
def makeMdFiles(self):
# create the data subdirectory to create the .md files there
try:
if not os.path.exists("./data/"):
os.makedirs("./data/")
except OSError:
print ("Error: Creating directory.: ./data/")
# loop through all of the csv files in the current directory and subdirectories
for file in self.csvFiles:
# open the current file in read mode
with open(file, "r", encoding='utf-8') as currentFile:
fileReader = csv.reader(currentFile, delimiter=self.settings["delimiter"])
# goes through each row in the currently open file and applies the md settings, creates a md file for each row
for row in fileReader:
# exlude the first row as it only includes the heading names
if fileReader.line_num != 1:
# contains all the formatted text which will be written to the md file
lst:list= []
# needed if YAML frontmatter is chosen to be written to the md files
unformattedLst:list = []
for el in range(len(row)):
# append the unformatted values to the list so that they can become
# the YAML values
if self.yaml == True:
unformattedLst.append(row[el].strip("\"'"))
# retrieve the formatted strings for the main text
# checks if the current cell contains multiple values
if len(self.settings["column"][el]) > 1:
# split the list at the given separator and return their unique values
sublist:list = row[el].split(self.settings["column"][el][2])
# apply the formatting to each unique element
splitSublist:str = self.splitSubList(sublist, self.settings["column"][el][0])
lst.append(splitSublist)
else:
formattedText:list = self.returnFormatting(row[el], self.settings["column"][el][0])
# only append "null" to the list of formatted content when there is no content when self.inlineyaml is True
# this will mean that the inline YAML keys can all be matched in enumerate and that it doesn't fail
if formattedText != None:
lst.append(formattedText)
elif formattedText == None and self.inlineyaml == True:
lst.append("null")
# accesses the column(s) that was/were selected for the file name
fileName:str = ""
if len(self.settings["fileNameCol"]) > 1:
for idx, el in enumerate(self.settings["fileNameCol"]):
if len(self.settings["fileNameCol"]) - 1 == idx:
fileName += row[el]
else:
fileName += row[el] + self.settings["fileNameColSeparator"]
else:
fileName = row[int(*self.settings["fileNameCol"],)]
# limit the file name to the specified length
fileName = fileName[:self.settings["fileNameLength"]]
fileName = fileName.strip()
# create the final file name
fileName = "./data/" + re.sub(r"<|>|:|\"|/|\\|\||\?|\*|\[|\]", "", fileName)
# checks whether there is already a file with the same file name
# if so, then it will append _{counter} to it
if os.path.isfile(fileName + ".md"):
counter:int = 1
while True:
if os.path.isfile(fileName + "_" + str(counter) + ".md"):
counter += 1
else:
fileName += "_" + str(counter)
break
fileName += ".md"
# write the contents to the file
try:
# creates a .md file in the data folder in append mode
with open (fileName, "a", encoding='utf-8') as f:
if self.yaml == True:
yamlLst:list = []
for idx, key in enumerate(self.keys):
# replace UTF-8 BOM, should be handled better/perhaps with user option to specify the encoding of the csv file
key = key.replace("\ufeff", "")
# unpack if list and make separate values for the key in the YAML
if len(self.settings["column"][idx]) > 1:
yamlSubLst:list = unformattedLst[idx].split(self.settings["column"][idx][2])
yamlSubStr:str = ""
for el in yamlSubLst:
if len(el) == 0:
continue
else:
yamlSubStr += f"\"{el.strip()}\", "
yamlSubStr = yamlSubStr.strip(", ")
yamlLst.append(f"{key}: [{yamlSubStr}]")
else:
# make value null if there is no value for the key
if len(unformattedLst[idx]) == 0:
yamlLst.append(f"{key}: null")
else:
yamlLst.append(f"{key}: [\"{unformattedLst[idx]}\"]")
f.write("---\n")
f.write("\n".join(yamlLst))
f.write("\n---\n")
# write the rest of the file
f.write("\n\n".join(lst))
# for inline yaml append key and formatted values to list
elif self.inlineyaml == True:
inlineyamlLst:list = []
for idx, key in enumerate(self.keys):
key = key.replace("\ufeff", "")
value = lst[idx].replace("\n", " ")
inlineyamlLst.append(f"{key}:: {value}")
f.write("\n".join(inlineyamlLst))
# if neither YAML nor inline YAML is selected to be written, only the main content will be written
else:
f.write("\n\n".join(lst))
except:
# remove the file that was erroneously created
if os.path.isfile(fileName):
os.remove(fileName)
# log the error to log.txt
with open("log.txt", "a", encoding='utf-8') as m:
m.write(fileName + " -- The contents of this file could not be written.\n")
def splitSubList(self, sublist, formatting):
sublist_str:str = ""
for el in sublist:
if len(el) == 0:
continue
else:
sublist_str += self.returnFormatting(el.strip("\"' "), formatting) + "\n"
return sublist_str.strip("\n")
def returnFormatting(self, string, formatting):
if len(string) > 0:
if formatting == "n":
return f"{string}"
elif formatting == "wl":
return f"[[{string}]]"
elif formatting == "ml":
return f"[{string}]({string})"
elif formatting == "hl":
return f"=={string}=="
elif formatting == "it":
return f"*{string}*"
elif formatting == "bo":
return f"**{string}**"
elif formatting == "st":
return f"~~{string}~~"
elif formatting == "co":
return f"`{string}`"
elif formatting == "cb":
return f"```\n{string}\n```"
elif formatting == "h1":
return f"# {string}"
elif formatting == "h2":
return f"## {string}"
elif formatting == "h3":
return f"### {string}"
elif formatting == "h4":
return f"#### {string}"
elif formatting == "h5":
return f"##### {string}"
elif formatting == "h6":
return f"###### {string}"
elif formatting == "wlem":
return f"![[{string}]]"
elif formatting == "mlem":
return f"![{string}]({string})"
elif formatting == "ul":
return f"- {string}"
elif formatting == "ol":
return f"1. {string}"
elif formatting == "bq":
return f">{string}"
elif formatting == "ut":
return f"- [ ] {string}"
elif formatting == "ct":
return f"- [x] {string}"
elif formatting == "ma":
return f"${string}$"
elif formatting == "mb":
return f"$$\n{string}\n$$"
elif formatting == "oc":
return f"%%{string}%%"
elif formatting == "ta":
return f"#{string}"
# return None if the passed cell value is empty
else:
return None
# instantiate GetInput class to start the script
startScript = GetInput()
startScript.choices()
print("The program has now finished. Check log.txt to see if there are any errors.")