forked from balrok/og_api
-
Notifications
You must be signed in to change notification settings - Fork 1
/
cache.py
320 lines (281 loc) · 10.6 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
import re
import os
import logging
import atexit
import cPickle as pickle
log = logging.getLogger('urlCache')
# contains a list of {'class'..,'check'..,'noDefault'} where class is the cacheclass and check is a function
# which decides if we use this specific cache. The decission is made based on configuration or wether a cache-folder/file
# already exists
# the last item in the list is the most important
# noDefault is a bool which defines if we should use this cache when no other caches match
cacheList = []
class BaseCache(object): # interface for all my caches
def remove(self, section):
pass
def lookup(self, section):
return None
def pyLookup(self, section):
return pickle.loads(self.lookup(section))
def pyLookupDefault(self, section, default):
r = self.lookup(section)
if r is None:
return default
return pickle.loads(r)
def lookupDefault(self, section, default):
r = self.lookup(section)
if r is None:
return default
return r
def write(self, section, data):
pass
def pyWrite(self, section, data):
data = pickle.dumps(data, 0)
self.write(section, data)
def allKeys(self):
return [i for i in self.iterKeys()]
def iterKeys(self):
for i in self.iterKeyValues():
yield i[0]
def iterKeyValues(self):
yield None
def count(self):
c = 0
for i in self.iterKeys():
c+=1
return c
def __repr__(self):
return self.__class__.__name__+':'+self.key
import codecs
FILENAME_MAX_LENGTH = 100 # maxlength of filenames
# the filecache has also some additional interface methods
class FileCache(BaseCache):
def __init__(self, dir, subdirs = []):
''' subdirs must be an array '''
for i in xrange(0, len(subdirs)):
dir = os.path.join(dir, self.create_filename(subdirs[i]))
self.path = dir
self.key = dir
# create the path only if we write something there, thats why those variables getting set
if os.path.isdir(self.path) is False:
self.create_path = True
else:
self.create_path = False
@staticmethod
def create_filename(s):
return re.sub('[^a-zA-Z0-9]','_',s)
def get_path(self, section, create = False):
if self.create_path:
if create:
try:
os.makedirs(self.path)
except:
pass
else:
return None
self.create_path = False
return os.path.join(self.path, section)
def iterKeys(self):
for root, subFolders, files in os.walk(self.path):
cleanRoot = root.replace(self.path+'/', '')
for file in files:
yield os.path.join(cleanRoot, file)
def iterKeyValues(self):
for root, subFolders, files in os.walk(self.path):
cleanRoot = root.replace(self.path+'/', '')
for file in files:
f = os.path.join(cleanRoot, file)
yield (f, codecs.open(self.path+"/"+f, 'r', 'utf-8').readlines())
def remove(self, section):
import shutil
file = self.get_path(section)
if file and os.path.isfile(file):
os.remove(file)
else:
shutil.rmtree(file)
def lookup(self, section):
file = self.get_path(section)
if file and os.path.isfile(file):
log.debug('using cache [%s] path: %s' % (section, file))
f = codecs.open(file, 'r', 'utf-8')
return ''.join(f.readlines())
return None
def lookup_size(self, section):
file = self.get_path(section)
if file and os.path.isfile(file):
return os.path.getsize(file)
return None
def read_stream(self, section):
file = self.get_path(section)
if file:
return open(file, 'rb')
return None
def truncate(self, section, x):
file = self.get_path(section)
if file:
a = open(file, 'r+b')
a.truncate(x)
def get_stream(self, section):
file = self.get_path(section, True)
return open(file, 'wb')
def get_append_stream(self, section):
file = self.get_path(section, True)
return open(file, 'ab')
def write(self, section, data):
file = self.get_path(section, True)
open(file, 'w').writelines(data.encode('utf-8'))
# below this i define several database caches - so they don't support append_stream and so on.. i won't store so big data inside
try:
from kyotocabinet import DB, Visitor
except:
pass
else:
dbList = {}
@atexit.register
def close():
for dir in dbList:
db = dbList[dir]
db.close()
class KyotoCache(BaseCache):
def __init__(self, dir, subdirs = []):
if dir not in dbList:
dbList[dir] = DB()
dbList[dir].open(dir+".kch", DB.OWRITER | DB.OCREATE)
self.db = dbList[dir]
self.key = "/".join(subdirs)
def lookup(self, section):
ret = self.db.get(self.key+"/"+section)
return ret
def write(self, section, data):
self.db.set(self.key+"/"+section, data)
def remove(self, section):
self.db.remove(self.key+"/"+section)
def iterKeys(self):
for i in self.db:
yield i
def iterKeyValues(self):
cur = self.db.cursor()
cur.jump()
def printproc(key, value):
return Visitor.NOP
while True:
cur.step()
if cur.get_key() == None:
break
yield (cur.get_key(), cur.get_value())
def count(self):
return self.db.count()
def isKyotoCache(namespace):
return os.path.exists(namespace+".kch")
cacheList.append({'class':KyotoCache, 'check':isKyotoCache})
class KyotoCacheComp(KyotoCache): # with compression
def __init__(self, dir, subdirs = []):
dir+="_zlib"
if dir not in dbList:
dbList[dir] = DB()
dbList[dir].open(dir+".kch#ops=c#log="+dir+".log#logkinds=debu#zcomp=zlib", DB.OWRITER | DB.OCREATE)
self.db = dbList[dir]
self.key = "/".join(subdirs)
def isKyotoCacheComp(namespace):
return os.path.exists(namespace+"_zlib.kch")
cacheList.append({'class':KyotoCacheComp, 'check':isKyotoCacheComp})
try:
import lib.leveldb as leveldb
except:
pass
else:
dbList = {}
class LevelCache(BaseCache):
def __init__(self, dir, subdirs = []):
dir+=".ldb"
if dir not in dbList:
dbList[dir] = leveldb.LevelDB(dir)
self.db = dbList[dir]
self.key = "/".join(subdirs)
def lookup(self, section):
ret = self.db.Get(self.key+"/"+section)
return ret
def write(self, section, data):
self.db.Put(self.key+"/"+section, data)
def remove(self, section):
self.db.Delete(self.key+"/"+section)
def iterKeys(self):
for i in self.db.RangeIter(include_value=False):
yield i
def iterKeyValues(self):
for i in self.db.RangeIter():
yield i
def isLevelCache(namespace):
return os.path.isdir(namespace+".ldb")
cacheList.append({'class':LevelCache, 'check':isLevelCache})
try:
from hypertable.thriftclient import ThriftClient
#from hyperthrift.gen.ttypes import *
except:
pass
else:
class HypertableCache(BaseCache):
clientCache = None
def __init__(self, dir, subdirs = []):
if HypertableCache.clientCache == None:
HypertableCache.clientCache = ThriftClient("localhost", 38080)
self.client = HypertableCache.clientCache
self.key = "/".join(subdirs)
if not self.client.exists_namespace("flashget_"+dir):
self.client.create_namespace("flashget_"+dir)
self.namespace = self.client.open_namespace("flashget_"+dir)
if not self.client.exists_table(self.namespace, "cache"):
sections = ['data', 'redirect']
self.client.hql_query(self.namespace, 'CREATE TABLE cache('+','.join(sections)+')');
def lookup(self, section):
key = self.key.replace('"', '\\"')
res = self.client.hql_query(self.namespace, 'select '+section+' FROM cache WHERE row="'+key+'" REVS 1 NO_ESCAPE')
if res.cells == []:
return None
return res.cells[0].value
def write(self, section, data):
key = self.key.replace('"', '\\"')
if key == '':
return None
self.client.hql_query(self.namespace, 'INSERT INTO cache VALUES ("%s", "%s", \'%s\')' % (key, section, data.replace("\\", "\\\\").replace("'", "\\'").replace("\x00", "")));
def remove(self, section):
key = self.key.replace('"', '\\"')
if section is None:
section = '*'
self.client.hql_query(self.namespace, 'DELETE %s FROM cache WHERE ROW="%s"' % (section, key))
def iterKeyValues(self):
res = self.client.hql_exec(self.namespace, 'select * FROM cache REVS 1', 0, 1)
scanner = res.scanner
while True:
cells = self.client.next_row_as_arrays(scanner)
if not len(cells): break
key = cells[0][0]
section = cells[0][1]
data = cells[0][3]
#timestamp = cells[0][4]
yield (key+"/"+section, data)
self.client.close_scanner(scanner)
# a factory, which will create the class based on cachelist
class Cache(object):
_dirToCache = {} # internal mapping from dir to cache
def __new__(cls, dir, subdirs=[]):
cls = None
if dir in Cache._dirToCache:
cls = Cache._dirToCache[dir]
if not cls:
for i in cacheList[::-1]:
if i['check'](dir):
cls = i['class']
break
else:
log.debug("no cache exists for %s" % dir)
# no cache found yet chose a default cache
for i in cacheList[::-1]:
if 'noDefault' in i and i['noDefault']:
continue
cls = i['class']
if cls == None:
raise Exception("No Cache is available")
log.debug("using cache %s" % cls.__name__)
Cache._dirToCache[dir] = cls
return cls(dir, subdirs)