-
Notifications
You must be signed in to change notification settings - Fork 9
/
monitor.py
177 lines (133 loc) · 5.43 KB
/
monitor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# Copyright (C) 2016 Robert Scott
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import threading
from os import listdir, remove
from os.path import isfile, isdir, join, exists
import logging
from time import sleep
import hashlib
from glob import glob
class MonitorThread(threading.Thread):
"""Monitors the spreadsheet directory for changes."""
def __init__(
self,
spreadsheets,
locks,
hashes,
soffice,
spreadsheets_path,
monitor_frequency,
reload_on_disk_change,
):
self._stop_thread = threading.Event()
self.spreadsheets = spreadsheets
self.locks = locks
self.hashes = hashes
self.soffice = soffice
self.spreadsheets_path = spreadsheets_path
self.monitor_frequency = monitor_frequency
self.reload_on_disk_change = reload_on_disk_change
self.__delete_lock_files()
self.done_scan = False # Done an initial scan or not
super(MonitorThread, self).__init__()
def stop_thread(self):
self._stop_thread.set()
def stopped(self):
return self._stop_thread.isSet()
def initial_scan(self):
return self.done_scan
def __get_full_path(self, doc):
return join(self.spreadsheets_path, doc)
def __delete_lock_files(self):
"""Lock files can cause issues opening documents."""
lock_files = glob(
join(self.spreadsheets_path, ".~lock.*#"), recursive=True
)
for lock_file in lock_files:
remove(lock_file)
def __load_spreadsheet(self, doc):
logging.info("Loading " + doc["path"])
self.spreadsheets[doc["path"]] = self.soffice.open_spreadsheet(
self.__get_full_path(doc["path"])
)
self.locks[doc["path"]] = threading.Lock()
self.hashes[doc["path"]] = doc["hash"]
def __unload_spreadsheet(self, doc_path):
logging.info("Removing " + doc_path)
self.locks[doc_path].acquire()
self.spreadsheets[doc_path].close()
self.spreadsheets.pop(doc_path, None)
self.locks.pop(doc_path, None)
self.hashes.pop(doc_path, None)
def __check_added(self):
"""Check for new spreadsheets and loads them into LibreOffice."""
for doc in self.docs:
if doc["path"][0] != ".": # Ignore hidden files
load = True # Default to loading the spreadsheet
for key, value in self.spreadsheets.items():
if doc["path"] == key:
# Check if the file has been modified
# Does the file now have a differnet hash?
if (
self.reload_on_disk_change
and doc["hash"] != self.hashes[doc["path"]]
):
self.__unload_spreadsheet(doc["path"])
else:
load = False
break
if load:
self.__load_spreadsheet(doc)
def __check_removed(self):
"""Check for any deleted or removed spreadsheets and remove them from
LibreOffice.
"""
removed_spreadsheets = []
for key, value in self.spreadsheets.items():
removed = True
for doc in self.docs:
if key == doc["path"]:
removed = False
break
if removed:
removed_spreadsheets.append(key)
for doc_path in removed_spreadsheets:
self.__unload_spreadsheet(doc_path)
def __scan_directory(self, d):
"""Recursively scan a directory for spreadsheets."""
dir_contents = listdir(d)
for f in dir_contents:
# Ignore particular files
if f[:7] == ".~lock." or f == ".gitignore":
continue
full_path = join(d, f)
if isfile(full_path):
# Remove self.spreadsheets_path from the path
relative_path = full_path.split(self.spreadsheets_path)[1][1:]
# Calculate the MD5 hash for the file
hasher = hashlib.md5()
with open(self.__get_full_path(relative_path), "rb") as afile:
buf = afile.read()
hasher.update(buf)
h = hasher.hexdigest()
self.docs.append({"path": relative_path, "hash": h})
elif isdir(full_path):
self.__scan_directory(full_path)
def run(self):
while not self.stopped():
self.docs = []
self.__scan_directory(self.spreadsheets_path)
self.__check_removed()
self.__check_added()
self.done_scan = True
sleep(self.monitor_frequency)